* [Qemu-devel] [PATCH 1/8] iscsi: add logical block provisioning information to iscsilun
2013-06-22 20:58 [Qemu-devel] [PATCH 0/8] iscsi/qemu-img/block-migration enhancements Peter Lieven
@ 2013-06-22 20:58 ` Peter Lieven
2013-06-24 14:35 ` Paolo Bonzini
2013-06-22 20:58 ` [Qemu-devel] [PATCH 2/8] iscsi: add bdrv_co_is_allocated Peter Lieven
` (6 subsequent siblings)
7 siblings, 1 reply; 26+ messages in thread
From: Peter Lieven @ 2013-06-22 20:58 UTC (permalink / raw)
To: qemu-devel
Cc: Kevin Wolf, pbonzini, Peter Lieven, ronniesahlberg,
Stefan Hajnoczi
Signed-off-by: Peter Lieven <pl@kamp.de>
---
block/iscsi.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 47 insertions(+)
diff --git a/block/iscsi.c b/block/iscsi.c
index 0bbf0b1..a38a1bf 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -49,6 +49,11 @@ typedef struct IscsiLun {
uint64_t num_blocks;
int events;
QEMUTimer *nop_timer;
+ uint8_t lbpme;
+ uint8_t lbprz;
+ uint8_t lbpu;
+ uint8_t lbpws;
+ uint8_t lbpws10;
} IscsiLun;
typedef struct IscsiAIOCB {
@@ -948,6 +953,8 @@ static int iscsi_readcapacity_sync(IscsiLun *iscsilun)
} else {
iscsilun->block_size = rc16->block_length;
iscsilun->num_blocks = rc16->returned_lba + 1;
+ iscsilun->lbpme = rc16->lbpme;
+ iscsilun->lbprz = rc16->lbprz;
}
}
break;
@@ -1130,6 +1137,46 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags)
bs->sg = 1;
}
+ if (iscsilun->lbpme) {
+ struct scsi_inquiry_logical_block_provisioning *inq_lbp;
+ int full_size;
+
+ task = iscsi_inquiry_sync(iscsi, iscsilun->lun, 1,
+ SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
+ 64);
+ if (task == NULL || task->status != SCSI_STATUS_GOOD) {
+ error_report("iSCSI: Inquiry command failed : %s",
+ iscsi_get_error(iscsilun->iscsi));
+ ret = -EINVAL;
+ goto out;
+ }
+ full_size = scsi_datain_getfullsize(task);
+ if (full_size > task->datain.size) {
+ scsi_free_scsi_task(task);
+
+ /* we need more data for the full list */
+ task = iscsi_inquiry_sync(iscsi, iscsilun->lun, 1,
+ SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
+ full_size);
+ if (task == NULL || task->status != SCSI_STATUS_GOOD) {
+ error_report("iSCSI: Inquiry command failed : %s",
+ iscsi_get_error(iscsilun->iscsi));
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ inq_lbp = scsi_datain_unmarshall(task);
+ if (inq_lbp == NULL) {
+ error_report("iSCSI: failed to unmarshall inquiry datain blob");
+ ret = -EINVAL;
+ goto out;
+ }
+ iscsilun->lbpu = inq_lbp->lbpu;
+ iscsilun->lbpws = inq_lbp->lbpws;
+ iscsilun->lbpws10 = inq_lbp->lbpws10;
+ }
+
#if defined(LIBISCSI_FEATURE_NOP_COUNTER)
/* Set up a timer for sending out iSCSI NOPs */
iscsilun->nop_timer = qemu_new_timer_ms(rt_clock, iscsi_nop_timed_event, iscsilun);
--
1.7.9.5
^ permalink raw reply related [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] [PATCH 1/8] iscsi: add logical block provisioning information to iscsilun
2013-06-22 20:58 ` [Qemu-devel] [PATCH 1/8] iscsi: add logical block provisioning information to iscsilun Peter Lieven
@ 2013-06-24 14:35 ` Paolo Bonzini
0 siblings, 0 replies; 26+ messages in thread
From: Paolo Bonzini @ 2013-06-24 14:35 UTC (permalink / raw)
To: Peter Lieven; +Cc: Kevin Wolf, ronniesahlberg, qemu-devel, Stefan Hajnoczi
Il 22/06/2013 22:58, Peter Lieven ha scritto:
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
> block/iscsi.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 47 insertions(+)
>
> diff --git a/block/iscsi.c b/block/iscsi.c
> index 0bbf0b1..a38a1bf 100644
> --- a/block/iscsi.c
> +++ b/block/iscsi.c
> @@ -49,6 +49,11 @@ typedef struct IscsiLun {
> uint64_t num_blocks;
> int events;
> QEMUTimer *nop_timer;
> + uint8_t lbpme;
> + uint8_t lbprz;
> + uint8_t lbpu;
> + uint8_t lbpws;
> + uint8_t lbpws10;
> } IscsiLun;
>
> typedef struct IscsiAIOCB {
> @@ -948,6 +953,8 @@ static int iscsi_readcapacity_sync(IscsiLun *iscsilun)
> } else {
> iscsilun->block_size = rc16->block_length;
> iscsilun->num_blocks = rc16->returned_lba + 1;
> + iscsilun->lbpme = rc16->lbpme;
> + iscsilun->lbprz = rc16->lbprz;
> }
> }
> break;
> @@ -1130,6 +1137,46 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags)
> bs->sg = 1;
> }
>
> + if (iscsilun->lbpme) {
> + struct scsi_inquiry_logical_block_provisioning *inq_lbp;
> + int full_size;
> +
> + task = iscsi_inquiry_sync(iscsi, iscsilun->lun, 1,
> + SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
> + 64);
> + if (task == NULL || task->status != SCSI_STATUS_GOOD) {
> + error_report("iSCSI: Inquiry command failed : %s",
> + iscsi_get_error(iscsilun->iscsi));
> + ret = -EINVAL;
> + goto out;
> + }
> + full_size = scsi_datain_getfullsize(task);
> + if (full_size > task->datain.size) {
> + scsi_free_scsi_task(task);
> +
> + /* we need more data for the full list */
> + task = iscsi_inquiry_sync(iscsi, iscsilun->lun, 1,
> + SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
> + full_size);
> + if (task == NULL || task->status != SCSI_STATUS_GOOD) {
> + error_report("iSCSI: Inquiry command failed : %s",
> + iscsi_get_error(iscsilun->iscsi));
> + ret = -EINVAL;
> + goto out;
> + }
> + }
> +
> + inq_lbp = scsi_datain_unmarshall(task);
> + if (inq_lbp == NULL) {
> + error_report("iSCSI: failed to unmarshall inquiry datain blob");
> + ret = -EINVAL;
> + goto out;
> + }
> + iscsilun->lbpu = inq_lbp->lbpu;
> + iscsilun->lbpws = inq_lbp->lbpws;
> + iscsilun->lbpws10 = inq_lbp->lbpws10;
> + }
> +
> #if defined(LIBISCSI_FEATURE_NOP_COUNTER)
> /* Set up a timer for sending out iSCSI NOPs */
> iscsilun->nop_timer = qemu_new_timer_ms(rt_clock, iscsi_nop_timed_event, iscsilun);
>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
^ permalink raw reply [flat|nested] 26+ messages in thread
* [Qemu-devel] [PATCH 2/8] iscsi: add bdrv_co_is_allocated
2013-06-22 20:58 [Qemu-devel] [PATCH 0/8] iscsi/qemu-img/block-migration enhancements Peter Lieven
2013-06-22 20:58 ` [Qemu-devel] [PATCH 1/8] iscsi: add logical block provisioning information to iscsilun Peter Lieven
@ 2013-06-22 20:58 ` Peter Lieven
2013-06-24 14:36 ` Paolo Bonzini
2013-06-22 20:58 ` [Qemu-devel] [PATCH 3/8] iscsi: add bdrv_co_write_zeroes Peter Lieven
` (5 subsequent siblings)
7 siblings, 1 reply; 26+ messages in thread
From: Peter Lieven @ 2013-06-22 20:58 UTC (permalink / raw)
To: qemu-devel
Cc: Kevin Wolf, pbonzini, Peter Lieven, ronniesahlberg,
Stefan Hajnoczi
Signed-off-by: Peter Lieven <pl@kamp.de>
---
block/iscsi.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 103 insertions(+)
diff --git a/block/iscsi.c b/block/iscsi.c
index a38a1bf..98be71e 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -56,6 +56,15 @@ typedef struct IscsiLun {
uint8_t lbpws10;
} IscsiLun;
+typedef struct IscsiTask {
+ IscsiLun *iscsilun;
+ BlockDriverState *bs;
+ int status;
+ int complete;
+ Coroutine *co;
+ struct scsi_lba_status_descriptor lbasd;
+} IscsiTask;
+
typedef struct IscsiAIOCB {
BlockDriverAIOCB common;
QEMUIOVector *qiov;
@@ -805,6 +814,98 @@ iscsi_getlength(BlockDriverState *bs)
return len;
}
+static void
+iscsi_co_is_allocated_cb(struct iscsi_context *iscsi, int status,
+ void *command_data, void *opaque)
+{
+ struct IscsiTask *iTask = opaque;
+ struct scsi_task *task = command_data;
+ struct scsi_get_lba_status *lbas = NULL;
+
+ iTask->complete = 1;
+
+ if (status != 0) {
+ error_report("iSCSI: Failed to get_lba_status on iSCSI lun. %s",
+ iscsi_get_error(iscsi));
+ iTask->status = 1;
+ goto out;
+ }
+
+ lbas = scsi_datain_unmarshall(task);
+ if (lbas == NULL) {
+ iTask->status = 1;
+ goto out;
+ }
+
+ memcpy(&iTask->lbasd, &lbas->descriptors[0],
+ sizeof(struct scsi_lba_status_descriptor));
+
+ iTask->status = 0;
+
+out:
+ scsi_free_scsi_task(task);
+
+ if (iTask->co) {
+ qemu_coroutine_enter(iTask->co, NULL);
+ }
+}
+
+static int coroutine_fn iscsi_co_is_allocated(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors, int *pnum)
+{
+ IscsiLun *iscsilun = bs->opaque;
+ struct IscsiTask iTask;
+ int ret;
+
+ *pnum = nb_sectors;
+
+ if (iscsilun->lbpme == 0) {
+ return 1;
+ }
+
+ iTask.iscsilun = iscsilun;
+ iTask.status = 0;
+ iTask.complete = 0;
+ iTask.bs = bs;
+
+ if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
+ sector_qemu2lun(sector_num, iscsilun),
+ 8 + 16, iscsi_co_is_allocated_cb,
+ &iTask) == NULL) {
+ *pnum = 0;
+ return 0;
+ }
+
+ while (!iTask.complete) {
+ iscsi_set_events(iscsilun);
+ iTask.co = qemu_coroutine_self();
+ qemu_coroutine_yield();
+ }
+
+ if (iTask.status != 0) {
+ /* in case the get_lba_status_callout fails (i.e.
+ * because the device is busy or the cmd is not
+ * supported) we pretend all blocks are allocated
+ * for backwards compatiblity */
+ return 1;
+ }
+
+ if (sector_qemu2lun(sector_num, iscsilun) != iTask.lbasd.lba) {
+ *pnum = 0;
+ return 0;
+ }
+
+ *pnum = iTask.lbasd.num_blocks * (iscsilun->block_size / BDRV_SECTOR_SIZE);
+ if (*pnum > nb_sectors) {
+ *pnum = nb_sectors;
+ }
+
+ return (iTask.lbasd.provisioning == SCSI_PROVISIONING_TYPE_MAPPED) ? 1 : 0;
+
+ return ret;
+}
+
static int parse_chap(struct iscsi_context *iscsi, const char *target)
{
QemuOptsList *list;
@@ -1315,6 +1416,8 @@ static BlockDriver bdrv_iscsi = {
.bdrv_getlength = iscsi_getlength,
.bdrv_truncate = iscsi_truncate,
+ .bdrv_co_is_allocated = iscsi_co_is_allocated,
+
.bdrv_aio_readv = iscsi_aio_readv,
.bdrv_aio_writev = iscsi_aio_writev,
.bdrv_aio_flush = iscsi_aio_flush,
--
1.7.9.5
^ permalink raw reply related [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] [PATCH 2/8] iscsi: add bdrv_co_is_allocated
2013-06-22 20:58 ` [Qemu-devel] [PATCH 2/8] iscsi: add bdrv_co_is_allocated Peter Lieven
@ 2013-06-24 14:36 ` Paolo Bonzini
0 siblings, 0 replies; 26+ messages in thread
From: Paolo Bonzini @ 2013-06-24 14:36 UTC (permalink / raw)
To: Peter Lieven; +Cc: Kevin Wolf, ronniesahlberg, qemu-devel, Stefan Hajnoczi
Il 22/06/2013 22:58, Peter Lieven ha scritto:
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
> block/iscsi.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 103 insertions(+)
>
> diff --git a/block/iscsi.c b/block/iscsi.c
> index a38a1bf..98be71e 100644
> --- a/block/iscsi.c
> +++ b/block/iscsi.c
> @@ -56,6 +56,15 @@ typedef struct IscsiLun {
> uint8_t lbpws10;
> } IscsiLun;
>
> +typedef struct IscsiTask {
> + IscsiLun *iscsilun;
> + BlockDriverState *bs;
> + int status;
> + int complete;
> + Coroutine *co;
> + struct scsi_lba_status_descriptor lbasd;
> +} IscsiTask;
> +
> typedef struct IscsiAIOCB {
> BlockDriverAIOCB common;
> QEMUIOVector *qiov;
> @@ -805,6 +814,98 @@ iscsi_getlength(BlockDriverState *bs)
> return len;
> }
>
> +static void
> +iscsi_co_is_allocated_cb(struct iscsi_context *iscsi, int status,
> + void *command_data, void *opaque)
> +{
> + struct IscsiTask *iTask = opaque;
> + struct scsi_task *task = command_data;
> + struct scsi_get_lba_status *lbas = NULL;
> +
> + iTask->complete = 1;
> +
> + if (status != 0) {
> + error_report("iSCSI: Failed to get_lba_status on iSCSI lun. %s",
> + iscsi_get_error(iscsi));
> + iTask->status = 1;
> + goto out;
> + }
> +
> + lbas = scsi_datain_unmarshall(task);
> + if (lbas == NULL) {
> + iTask->status = 1;
> + goto out;
> + }
> +
> + memcpy(&iTask->lbasd, &lbas->descriptors[0],
> + sizeof(struct scsi_lba_status_descriptor));
> +
> + iTask->status = 0;
> +
> +out:
> + scsi_free_scsi_task(task);
> +
> + if (iTask->co) {
> + qemu_coroutine_enter(iTask->co, NULL);
> + }
> +}
> +
> +static int coroutine_fn iscsi_co_is_allocated(BlockDriverState *bs,
> + int64_t sector_num,
> + int nb_sectors, int *pnum)
> +{
> + IscsiLun *iscsilun = bs->opaque;
> + struct IscsiTask iTask;
> + int ret;
> +
> + *pnum = nb_sectors;
> +
> + if (iscsilun->lbpme == 0) {
> + return 1;
> + }
> +
> + iTask.iscsilun = iscsilun;
> + iTask.status = 0;
> + iTask.complete = 0;
> + iTask.bs = bs;
> +
> + if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
> + sector_qemu2lun(sector_num, iscsilun),
> + 8 + 16, iscsi_co_is_allocated_cb,
> + &iTask) == NULL) {
> + *pnum = 0;
> + return 0;
> + }
> +
> + while (!iTask.complete) {
> + iscsi_set_events(iscsilun);
> + iTask.co = qemu_coroutine_self();
> + qemu_coroutine_yield();
> + }
> +
> + if (iTask.status != 0) {
> + /* in case the get_lba_status_callout fails (i.e.
> + * because the device is busy or the cmd is not
> + * supported) we pretend all blocks are allocated
> + * for backwards compatiblity */
> + return 1;
> + }
> +
> + if (sector_qemu2lun(sector_num, iscsilun) != iTask.lbasd.lba) {
> + *pnum = 0;
> + return 0;
> + }
> +
> + *pnum = iTask.lbasd.num_blocks * (iscsilun->block_size / BDRV_SECTOR_SIZE);
> + if (*pnum > nb_sectors) {
> + *pnum = nb_sectors;
> + }
> +
> + return (iTask.lbasd.provisioning == SCSI_PROVISIONING_TYPE_MAPPED) ? 1 : 0;
> +
> + return ret;
> +}
> +
> static int parse_chap(struct iscsi_context *iscsi, const char *target)
> {
> QemuOptsList *list;
> @@ -1315,6 +1416,8 @@ static BlockDriver bdrv_iscsi = {
> .bdrv_getlength = iscsi_getlength,
> .bdrv_truncate = iscsi_truncate,
>
> + .bdrv_co_is_allocated = iscsi_co_is_allocated,
> +
> .bdrv_aio_readv = iscsi_aio_readv,
> .bdrv_aio_writev = iscsi_aio_writev,
> .bdrv_aio_flush = iscsi_aio_flush,
>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
^ permalink raw reply [flat|nested] 26+ messages in thread
* [Qemu-devel] [PATCH 3/8] iscsi: add bdrv_co_write_zeroes
2013-06-22 20:58 [Qemu-devel] [PATCH 0/8] iscsi/qemu-img/block-migration enhancements Peter Lieven
2013-06-22 20:58 ` [Qemu-devel] [PATCH 1/8] iscsi: add logical block provisioning information to iscsilun Peter Lieven
2013-06-22 20:58 ` [Qemu-devel] [PATCH 2/8] iscsi: add bdrv_co_is_allocated Peter Lieven
@ 2013-06-22 20:58 ` Peter Lieven
2013-06-24 14:34 ` Paolo Bonzini
2013-06-22 20:58 ` [Qemu-devel] [PATCH 4/8] block: add bdrv_write_zeroes() Peter Lieven
` (4 subsequent siblings)
7 siblings, 1 reply; 26+ messages in thread
From: Peter Lieven @ 2013-06-22 20:58 UTC (permalink / raw)
To: qemu-devel
Cc: Kevin Wolf, pbonzini, Peter Lieven, ronniesahlberg,
Stefan Hajnoczi
write zeroes is emulated by unmap if the
target supports unmapping an unmapped blocks
read as zero.
Signed-off-by: Peter Lieven <pl@kamp.de>
---
block/iscsi.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 63 insertions(+)
diff --git a/block/iscsi.c b/block/iscsi.c
index 98be71e..683692a 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -906,6 +906,68 @@ static int coroutine_fn iscsi_co_is_allocated(BlockDriverState *bs,
return ret;
}
+static void
+iscsi_co_write_zeroes_cb(struct iscsi_context *iscsi, int status,
+ void *command_data, void *opaque)
+{
+ struct IscsiTask *iTask = opaque;
+ struct scsi_task *task = command_data;
+
+ iTask->complete = 1;
+ iTask->status = 0;
+
+ if (status != 0) {
+ error_report("iSCSI: Failed to unmap data on iSCSI lun. %s",
+ iscsi_get_error(iscsi));
+ iTask->status = 1;
+ }
+
+ scsi_free_scsi_task(task);
+
+ if (iTask->co) {
+ qemu_coroutine_enter(iTask->co, NULL);
+ }
+}
+
+static int
+coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors)
+{
+ IscsiLun *iscsilun = bs->opaque;
+ struct IscsiTask iTask;
+ struct unmap_list list[1];
+
+ if (!iscsilun->lbprz || !iscsilun->lbpu) {
+ /* fall back to writev */
+ return -ENOTSUP;
+ }
+
+ iTask.iscsilun = iscsilun;
+ iTask.status = 0;
+ iTask.complete = 0;
+ iTask.bs = bs;
+
+ list[0].lba = sector_qemu2lun(sector_num, iscsilun);
+ list[0].num = nb_sectors * BDRV_SECTOR_SIZE / iscsilun->block_size;
+
+ if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list[0], 1,
+ iscsi_co_write_zeroes_cb, &iTask) == NULL) {
+ return -EIO;
+ }
+
+ while (!iTask.complete) {
+ iscsi_set_events(iscsilun);
+ iTask.co = qemu_coroutine_self();
+ qemu_coroutine_yield();
+ }
+
+ if (iTask.status != 0) {
+ return -EIO;
+ }
+
+ return 0;
+}
+
static int parse_chap(struct iscsi_context *iscsi, const char *target)
{
QemuOptsList *list;
@@ -1417,6 +1479,7 @@ static BlockDriver bdrv_iscsi = {
.bdrv_truncate = iscsi_truncate,
.bdrv_co_is_allocated = iscsi_co_is_allocated,
+ .bdrv_co_write_zeroes = iscsi_co_write_zeroes,
.bdrv_aio_readv = iscsi_aio_readv,
.bdrv_aio_writev = iscsi_aio_writev,
--
1.7.9.5
^ permalink raw reply related [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] [PATCH 3/8] iscsi: add bdrv_co_write_zeroes
2013-06-22 20:58 ` [Qemu-devel] [PATCH 3/8] iscsi: add bdrv_co_write_zeroes Peter Lieven
@ 2013-06-24 14:34 ` Paolo Bonzini
2013-06-24 16:31 ` Peter Lieven
0 siblings, 1 reply; 26+ messages in thread
From: Paolo Bonzini @ 2013-06-24 14:34 UTC (permalink / raw)
To: Peter Lieven; +Cc: Kevin Wolf, ronniesahlberg, qemu-devel, Stefan Hajnoczi
Il 22/06/2013 22:58, Peter Lieven ha scritto:
> write zeroes is emulated by unmap if the
> target supports unmapping an unmapped blocks
> read as zero.
I think you should check BDRV_O_UNMAP in bs->open_flags, and not do this
optimization if it is not set.
Paolo
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
> block/iscsi.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 63 insertions(+)
>
> diff --git a/block/iscsi.c b/block/iscsi.c
> index 98be71e..683692a 100644
> --- a/block/iscsi.c
> +++ b/block/iscsi.c
> @@ -906,6 +906,68 @@ static int coroutine_fn iscsi_co_is_allocated(BlockDriverState *bs,
> return ret;
> }
>
> +static void
> +iscsi_co_write_zeroes_cb(struct iscsi_context *iscsi, int status,
> + void *command_data, void *opaque)
> +{
> + struct IscsiTask *iTask = opaque;
> + struct scsi_task *task = command_data;
> +
> + iTask->complete = 1;
> + iTask->status = 0;
> +
> + if (status != 0) {
> + error_report("iSCSI: Failed to unmap data on iSCSI lun. %s",
> + iscsi_get_error(iscsi));
> + iTask->status = 1;
> + }
> +
> + scsi_free_scsi_task(task);
> +
> + if (iTask->co) {
> + qemu_coroutine_enter(iTask->co, NULL);
> + }
> +}
> +
> +static int
> +coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
> + int nb_sectors)
> +{
> + IscsiLun *iscsilun = bs->opaque;
> + struct IscsiTask iTask;
> + struct unmap_list list[1];
> +
> + if (!iscsilun->lbprz || !iscsilun->lbpu) {
> + /* fall back to writev */
> + return -ENOTSUP;
> + }
> +
> + iTask.iscsilun = iscsilun;
> + iTask.status = 0;
> + iTask.complete = 0;
> + iTask.bs = bs;
> +
> + list[0].lba = sector_qemu2lun(sector_num, iscsilun);
> + list[0].num = nb_sectors * BDRV_SECTOR_SIZE / iscsilun->block_size;
> +
> + if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list[0], 1,
> + iscsi_co_write_zeroes_cb, &iTask) == NULL) {
> + return -EIO;
> + }
> +
> + while (!iTask.complete) {
> + iscsi_set_events(iscsilun);
> + iTask.co = qemu_coroutine_self();
> + qemu_coroutine_yield();
> + }
> +
> + if (iTask.status != 0) {
> + return -EIO;
> + }
> +
> + return 0;
> +}
> +
> static int parse_chap(struct iscsi_context *iscsi, const char *target)
> {
> QemuOptsList *list;
> @@ -1417,6 +1479,7 @@ static BlockDriver bdrv_iscsi = {
> .bdrv_truncate = iscsi_truncate,
>
> .bdrv_co_is_allocated = iscsi_co_is_allocated,
> + .bdrv_co_write_zeroes = iscsi_co_write_zeroes,
>
> .bdrv_aio_readv = iscsi_aio_readv,
> .bdrv_aio_writev = iscsi_aio_writev,
>
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] [PATCH 3/8] iscsi: add bdrv_co_write_zeroes
2013-06-24 14:34 ` Paolo Bonzini
@ 2013-06-24 16:31 ` Peter Lieven
0 siblings, 0 replies; 26+ messages in thread
From: Peter Lieven @ 2013-06-24 16:31 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: Kevin Wolf, ronniesahlberg, qemu-devel, Stefan Hajnoczi
Am 24.06.2013 16:34, schrieb Paolo Bonzini:
> Il 22/06/2013 22:58, Peter Lieven ha scritto:
>> write zeroes is emulated by unmap if the
>> target supports unmapping an unmapped blocks
>> read as zero.
> I think you should check BDRV_O_UNMAP in bs->open_flags, and not do this
> optimization if it is not set.
Ok, I will change this. I will also patch iSCSI to return 0 if bdrv_co_discard
is called and the target does not support unmap.
In this case its save to set discard=on for iSCSI.
Peter
>
> Paolo
>
>> Signed-off-by: Peter Lieven <pl@kamp.de>
>> ---
>> block/iscsi.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 63 insertions(+)
>>
>> diff --git a/block/iscsi.c b/block/iscsi.c
>> index 98be71e..683692a 100644
>> --- a/block/iscsi.c
>> +++ b/block/iscsi.c
>> @@ -906,6 +906,68 @@ static int coroutine_fn iscsi_co_is_allocated(BlockDriverState *bs,
>> return ret;
>> }
>>
>> +static void
>> +iscsi_co_write_zeroes_cb(struct iscsi_context *iscsi, int status,
>> + void *command_data, void *opaque)
>> +{
>> + struct IscsiTask *iTask = opaque;
>> + struct scsi_task *task = command_data;
>> +
>> + iTask->complete = 1;
>> + iTask->status = 0;
>> +
>> + if (status != 0) {
>> + error_report("iSCSI: Failed to unmap data on iSCSI lun. %s",
>> + iscsi_get_error(iscsi));
>> + iTask->status = 1;
>> + }
>> +
>> + scsi_free_scsi_task(task);
>> +
>> + if (iTask->co) {
>> + qemu_coroutine_enter(iTask->co, NULL);
>> + }
>> +}
>> +
>> +static int
>> +coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
>> + int nb_sectors)
>> +{
>> + IscsiLun *iscsilun = bs->opaque;
>> + struct IscsiTask iTask;
>> + struct unmap_list list[1];
>> +
>> + if (!iscsilun->lbprz || !iscsilun->lbpu) {
>> + /* fall back to writev */
>> + return -ENOTSUP;
>> + }
>> +
>> + iTask.iscsilun = iscsilun;
>> + iTask.status = 0;
>> + iTask.complete = 0;
>> + iTask.bs = bs;
>> +
>> + list[0].lba = sector_qemu2lun(sector_num, iscsilun);
>> + list[0].num = nb_sectors * BDRV_SECTOR_SIZE / iscsilun->block_size;
>> +
>> + if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list[0], 1,
>> + iscsi_co_write_zeroes_cb, &iTask) == NULL) {
>> + return -EIO;
>> + }
>> +
>> + while (!iTask.complete) {
>> + iscsi_set_events(iscsilun);
>> + iTask.co = qemu_coroutine_self();
>> + qemu_coroutine_yield();
>> + }
>> +
>> + if (iTask.status != 0) {
>> + return -EIO;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> static int parse_chap(struct iscsi_context *iscsi, const char *target)
>> {
>> QemuOptsList *list;
>> @@ -1417,6 +1479,7 @@ static BlockDriver bdrv_iscsi = {
>> .bdrv_truncate = iscsi_truncate,
>>
>> .bdrv_co_is_allocated = iscsi_co_is_allocated,
>> + .bdrv_co_write_zeroes = iscsi_co_write_zeroes,
>>
>> .bdrv_aio_readv = iscsi_aio_readv,
>> .bdrv_aio_writev = iscsi_aio_writev,
>>
^ permalink raw reply [flat|nested] 26+ messages in thread
* [Qemu-devel] [PATCH 4/8] block: add bdrv_write_zeroes()
2013-06-22 20:58 [Qemu-devel] [PATCH 0/8] iscsi/qemu-img/block-migration enhancements Peter Lieven
` (2 preceding siblings ...)
2013-06-22 20:58 ` [Qemu-devel] [PATCH 3/8] iscsi: add bdrv_co_write_zeroes Peter Lieven
@ 2013-06-22 20:58 ` Peter Lieven
2013-06-22 20:58 ` [Qemu-devel] [PATCH 5/8] block/raw: add bdrv_co_write_zeroes Peter Lieven
` (3 subsequent siblings)
7 siblings, 0 replies; 26+ messages in thread
From: Peter Lieven @ 2013-06-22 20:58 UTC (permalink / raw)
To: qemu-devel
Cc: Kevin Wolf, pbonzini, Peter Lieven, ronniesahlberg,
Stefan Hajnoczi
Signed-off-by: Peter Lieven <pl@kamp.de>
---
block.c | 27 +++++++++++++++++++--------
include/block/block.h | 2 ++
2 files changed, 21 insertions(+), 8 deletions(-)
diff --git a/block.c b/block.c
index b88ad2f..e32b8cf 100644
--- a/block.c
+++ b/block.c
@@ -2163,6 +2163,7 @@ typedef struct RwCo {
QEMUIOVector *qiov;
bool is_write;
int ret;
+ BdrvRequestFlags flags;
} RwCo;
static void coroutine_fn bdrv_rw_co_entry(void *opaque)
@@ -2171,10 +2172,12 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque)
if (!rwco->is_write) {
rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
- rwco->nb_sectors, rwco->qiov, 0);
+ rwco->nb_sectors, rwco->qiov,
+ rwco->flags);
} else {
rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
- rwco->nb_sectors, rwco->qiov, 0);
+ rwco->nb_sectors, rwco->qiov,
+ rwco->flags);
}
}
@@ -2182,7 +2185,8 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque)
* Process a vectored synchronous request using coroutines
*/
static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *qiov, bool is_write)
+ QEMUIOVector *qiov, bool is_write,
+ BdrvRequestFlags flags)
{
Coroutine *co;
RwCo rwco = {
@@ -2192,6 +2196,7 @@ static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num,
.qiov = qiov,
.is_write = is_write,
.ret = NOT_DONE,
+ .flags = flags,
};
assert((qiov->size & (BDRV_SECTOR_SIZE - 1)) == 0);
@@ -2223,7 +2228,7 @@ static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num,
* Process a synchronous request using coroutines
*/
static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
- int nb_sectors, bool is_write)
+ int nb_sectors, bool is_write, BdrvRequestFlags flags)
{
QEMUIOVector qiov;
struct iovec iov = {
@@ -2232,14 +2237,14 @@ static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
};
qemu_iovec_init_external(&qiov, &iov, 1);
- return bdrv_rwv_co(bs, sector_num, &qiov, is_write);
+ return bdrv_rwv_co(bs, sector_num, &qiov, is_write, flags);
}
/* return < 0 if error. See bdrv_write() for the return codes */
int bdrv_read(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf, int nb_sectors)
{
- return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
+ return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
}
/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
@@ -2265,12 +2270,18 @@ int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
int bdrv_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors)
{
- return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
+ return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
}
int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov)
{
- return bdrv_rwv_co(bs, sector_num, qiov, true);
+ return bdrv_rwv_co(bs, sector_num, qiov, true, 0);
+}
+
+int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
+{
+ return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
+ BDRV_REQ_ZERO_WRITE);
}
int bdrv_pread(BlockDriverState *bs, int64_t offset,
diff --git a/include/block/block.h b/include/block/block.h
index 2307f67..5a062fa 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -156,6 +156,8 @@ int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf, int nb_sectors);
int bdrv_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors);
+int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors);
int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov);
int bdrv_pread(BlockDriverState *bs, int64_t offset,
void *buf, int count);
--
1.7.9.5
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [Qemu-devel] [PATCH 5/8] block/raw: add bdrv_co_write_zeroes
2013-06-22 20:58 [Qemu-devel] [PATCH 0/8] iscsi/qemu-img/block-migration enhancements Peter Lieven
` (3 preceding siblings ...)
2013-06-22 20:58 ` [Qemu-devel] [PATCH 4/8] block: add bdrv_write_zeroes() Peter Lieven
@ 2013-06-22 20:58 ` Peter Lieven
2013-06-22 20:58 ` [Qemu-devel] [PATCH 6/8] qemu-img: use bdrv_write_zeroes to write zeroes Peter Lieven
` (2 subsequent siblings)
7 siblings, 0 replies; 26+ messages in thread
From: Peter Lieven @ 2013-06-22 20:58 UTC (permalink / raw)
To: qemu-devel
Cc: Kevin Wolf, pbonzini, Peter Lieven, ronniesahlberg,
Stefan Hajnoczi
Signed-off-by: Peter Lieven <pl@kamp.de>
---
block/raw.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/block/raw.c b/block/raw.c
index ce10422..8c81de9 100644
--- a/block/raw.c
+++ b/block/raw.c
@@ -42,6 +42,13 @@ static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
return bdrv_co_is_allocated(bs->file, sector_num, nb_sectors, pnum);
}
+static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors)
+{
+ return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors);
+}
+
static int64_t raw_getlength(BlockDriverState *bs)
{
return bdrv_getlength(bs->file);
@@ -128,6 +135,7 @@ static BlockDriver bdrv_raw = {
.bdrv_co_readv = raw_co_readv,
.bdrv_co_writev = raw_co_writev,
.bdrv_co_is_allocated = raw_co_is_allocated,
+ .bdrv_co_write_zeroes = raw_co_write_zeroes,
.bdrv_co_discard = raw_co_discard,
.bdrv_probe = raw_probe,
--
1.7.9.5
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [Qemu-devel] [PATCH 6/8] qemu-img: use bdrv_write_zeroes to write zeroes
2013-06-22 20:58 [Qemu-devel] [PATCH 0/8] iscsi/qemu-img/block-migration enhancements Peter Lieven
` (4 preceding siblings ...)
2013-06-22 20:58 ` [Qemu-devel] [PATCH 5/8] block/raw: add bdrv_co_write_zeroes Peter Lieven
@ 2013-06-22 20:58 ` Peter Lieven
2013-06-24 14:33 ` Paolo Bonzini
2013-06-22 20:58 ` [Qemu-devel] [PATCH 7/8] iscsi: assert that sectors are aligned to LUN blocksize Peter Lieven
2013-06-22 20:58 ` [Qemu-devel] [PATCH 8/8] block-migration: efficiently encode zero blocks Peter Lieven
7 siblings, 1 reply; 26+ messages in thread
From: Peter Lieven @ 2013-06-22 20:58 UTC (permalink / raw)
To: qemu-devel
Cc: Kevin Wolf, pbonzini, Peter Lieven, ronniesahlberg,
Stefan Hajnoczi
Signed-off-by: Peter Lieven <pl@kamp.de>
---
qemu-img.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/qemu-img.c b/qemu-img.c
index 809b4f1..5aa53ab 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1513,9 +1513,13 @@ static int img_convert(int argc, char **argv)
If the output is to a host device, we also write out
sectors that are entirely 0, since whatever data was
already there is garbage, not 0s. */
- if (!has_zero_init || out_baseimg ||
- is_allocated_sectors_min(buf1, n, &n1, min_sparse)) {
- ret = bdrv_write(out_bs, sector_num, buf1, n1);
+ int allocated = is_allocated_sectors_min(buf1, n, &n1, min_sparse);
+ if (!has_zero_init || out_baseimg || allocated) {
+ if (allocated || out_baseimg) {
+ ret = bdrv_write(out_bs, sector_num, buf1, n1);
+ } else {
+ ret = bdrv_write_zeroes(out_bs, sector_num, n1);
+ }
if (ret < 0) {
error_report("error while writing sector %" PRId64
": %s", sector_num, strerror(-ret));
--
1.7.9.5
^ permalink raw reply related [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] [PATCH 6/8] qemu-img: use bdrv_write_zeroes to write zeroes
2013-06-22 20:58 ` [Qemu-devel] [PATCH 6/8] qemu-img: use bdrv_write_zeroes to write zeroes Peter Lieven
@ 2013-06-24 14:33 ` Paolo Bonzini
2013-06-24 16:17 ` Peter Lieven
0 siblings, 1 reply; 26+ messages in thread
From: Paolo Bonzini @ 2013-06-24 14:33 UTC (permalink / raw)
To: Peter Lieven; +Cc: Kevin Wolf, ronniesahlberg, qemu-devel, Stefan Hajnoczi
Il 22/06/2013 22:58, Peter Lieven ha scritto:
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
> qemu-img.c | 10 +++++++---
> 1 file changed, 7 insertions(+), 3 deletions(-)
>
> diff --git a/qemu-img.c b/qemu-img.c
> index 809b4f1..5aa53ab 100644
> --- a/qemu-img.c
> +++ b/qemu-img.c
> @@ -1513,9 +1513,13 @@ static int img_convert(int argc, char **argv)
> If the output is to a host device, we also write out
> sectors that are entirely 0, since whatever data was
> already there is garbage, not 0s. */
> - if (!has_zero_init || out_baseimg ||
> - is_allocated_sectors_min(buf1, n, &n1, min_sparse)) {
> - ret = bdrv_write(out_bs, sector_num, buf1, n1);
> + int allocated = is_allocated_sectors_min(buf1, n, &n1, min_sparse);
> + if (!has_zero_init || out_baseimg || allocated) {
> + if (allocated || out_baseimg) {
> + ret = bdrv_write(out_bs, sector_num, buf1, n1);
> + } else {
> + ret = bdrv_write_zeroes(out_bs, sector_num, n1);
I think it should still do the write only if !has_zero_init.
Paolo
> + }
> if (ret < 0) {
> error_report("error while writing sector %" PRId64
> ": %s", sector_num, strerror(-ret));
>
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] [PATCH 6/8] qemu-img: use bdrv_write_zeroes to write zeroes
2013-06-24 14:33 ` Paolo Bonzini
@ 2013-06-24 16:17 ` Peter Lieven
2013-06-24 16:25 ` Paolo Bonzini
0 siblings, 1 reply; 26+ messages in thread
From: Peter Lieven @ 2013-06-24 16:17 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: Kevin Wolf, ronniesahlberg, qemu-devel, Stefan Hajnoczi
Am 24.06.2013 16:33, schrieb Paolo Bonzini:
> Il 22/06/2013 22:58, Peter Lieven ha scritto:
>> Signed-off-by: Peter Lieven <pl@kamp.de>
>> ---
>> qemu-img.c | 10 +++++++---
>> 1 file changed, 7 insertions(+), 3 deletions(-)
>>
>> diff --git a/qemu-img.c b/qemu-img.c
>> index 809b4f1..5aa53ab 100644
>> --- a/qemu-img.c
>> +++ b/qemu-img.c
>> @@ -1513,9 +1513,13 @@ static int img_convert(int argc, char **argv)
>> If the output is to a host device, we also write out
>> sectors that are entirely 0, since whatever data was
>> already there is garbage, not 0s. */
>> - if (!has_zero_init || out_baseimg ||
>> - is_allocated_sectors_min(buf1, n, &n1, min_sparse)) {
>> - ret = bdrv_write(out_bs, sector_num, buf1, n1);
>> + int allocated = is_allocated_sectors_min(buf1, n, &n1, min_sparse);
>> + if (!has_zero_init || out_baseimg || allocated) {
>> + if (allocated || out_baseimg) {
>> + ret = bdrv_write(out_bs, sector_num, buf1, n1);
>> + } else {
>> + ret = bdrv_write_zeroes(out_bs, sector_num, n1);
> I think it should still do the write only if !has_zero_init.
With this I am still in trouble with iSCSI, but Kevin pointed out another
possible solution for the allocating everything problem.
a) let iscsi_create discard the whole device if lbpz && lbprz
b) return 1 for has_zero_init if lbprz.
What do you think?
Peter
>
> Paolo
>
>> + }
>> if (ret < 0) {
>> error_report("error while writing sector %" PRId64
>> ": %s", sector_num, strerror(-ret));
>>
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] [PATCH 6/8] qemu-img: use bdrv_write_zeroes to write zeroes
2013-06-24 16:17 ` Peter Lieven
@ 2013-06-24 16:25 ` Paolo Bonzini
2013-06-24 16:33 ` Peter Lieven
2013-06-24 18:46 ` Peter Lieven
0 siblings, 2 replies; 26+ messages in thread
From: Paolo Bonzini @ 2013-06-24 16:25 UTC (permalink / raw)
To: Peter Lieven; +Cc: Kevin Wolf, ronniesahlberg, qemu-devel, Stefan Hajnoczi
Il 24/06/2013 18:17, Peter Lieven ha scritto:
> Am 24.06.2013 16:33, schrieb Paolo Bonzini:
>> Il 22/06/2013 22:58, Peter Lieven ha scritto:
>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>> ---
>>> qemu-img.c | 10 +++++++---
>>> 1 file changed, 7 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/qemu-img.c b/qemu-img.c
>>> index 809b4f1..5aa53ab 100644
>>> --- a/qemu-img.c
>>> +++ b/qemu-img.c
>>> @@ -1513,9 +1513,13 @@ static int img_convert(int argc, char **argv)
>>> If the output is to a host device, we also write out
>>> sectors that are entirely 0, since whatever data was
>>> already there is garbage, not 0s. */
>>> - if (!has_zero_init || out_baseimg ||
>>> - is_allocated_sectors_min(buf1, n, &n1, min_sparse)) {
>>> - ret = bdrv_write(out_bs, sector_num, buf1, n1);
>>> + int allocated = is_allocated_sectors_min(buf1, n, &n1, min_sparse);
>>> + if (!has_zero_init || out_baseimg || allocated) {
>>> + if (allocated || out_baseimg) {
>>> + ret = bdrv_write(out_bs, sector_num, buf1, n1);
>>> + } else {
>>> + ret = bdrv_write_zeroes(out_bs, sector_num, n1);
>> I think it should still do the write only if !has_zero_init.
> With this I am still in trouble with iSCSI, but Kevin pointed out another
> possible solution for the allocating everything problem.
>
> a) let iscsi_create discard the whole device if lbpz && lbprz
> b) return 1 for has_zero_init if lbprz.
>
> What do you think?
Fine by me (but the discard may take a looong time! :)).
Paolo
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] [PATCH 6/8] qemu-img: use bdrv_write_zeroes to write zeroes
2013-06-24 16:25 ` Paolo Bonzini
@ 2013-06-24 16:33 ` Peter Lieven
2013-06-24 18:46 ` Peter Lieven
1 sibling, 0 replies; 26+ messages in thread
From: Peter Lieven @ 2013-06-24 16:33 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: Kevin Wolf, ronniesahlberg, qemu-devel, Stefan Hajnoczi
Am 24.06.2013 18:25, schrieb Paolo Bonzini:
> Il 24/06/2013 18:17, Peter Lieven ha scritto:
>> Am 24.06.2013 16:33, schrieb Paolo Bonzini:
>>> Il 22/06/2013 22:58, Peter Lieven ha scritto:
>>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>>> ---
>>>> qemu-img.c | 10 +++++++---
>>>> 1 file changed, 7 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/qemu-img.c b/qemu-img.c
>>>> index 809b4f1..5aa53ab 100644
>>>> --- a/qemu-img.c
>>>> +++ b/qemu-img.c
>>>> @@ -1513,9 +1513,13 @@ static int img_convert(int argc, char **argv)
>>>> If the output is to a host device, we also write out
>>>> sectors that are entirely 0, since whatever data was
>>>> already there is garbage, not 0s. */
>>>> - if (!has_zero_init || out_baseimg ||
>>>> - is_allocated_sectors_min(buf1, n, &n1, min_sparse)) {
>>>> - ret = bdrv_write(out_bs, sector_num, buf1, n1);
>>>> + int allocated = is_allocated_sectors_min(buf1, n, &n1, min_sparse);
>>>> + if (!has_zero_init || out_baseimg || allocated) {
>>>> + if (allocated || out_baseimg) {
>>>> + ret = bdrv_write(out_bs, sector_num, buf1, n1);
>>>> + } else {
>>>> + ret = bdrv_write_zeroes(out_bs, sector_num, n1);
>>> I think it should still do the write only if !has_zero_init.
>> With this I am still in trouble with iSCSI, but Kevin pointed out another
>> possible solution for the allocating everything problem.
>>
>> a) let iscsi_create discard the whole device if lbpz && lbprz
>> b) return 1 for has_zero_init if lbprz.
>>
>> What do you think?
> Fine by me (but the discard may take a looong time! :)).
This was also my concern, but if a target advertises logical block
provisioning it should be able to discard blocks fast (and not overwrite all of
them or sth like that).
Peter
>
> Paolo
>
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] [PATCH 6/8] qemu-img: use bdrv_write_zeroes to write zeroes
2013-06-24 16:25 ` Paolo Bonzini
2013-06-24 16:33 ` Peter Lieven
@ 2013-06-24 18:46 ` Peter Lieven
1 sibling, 0 replies; 26+ messages in thread
From: Peter Lieven @ 2013-06-24 18:46 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: Kevin Wolf, ronniesahlberg, qemu-devel, Stefan Hajnoczi
Am 24.06.2013 18:25, schrieb Paolo Bonzini:
> Il 24/06/2013 18:17, Peter Lieven ha scritto:
>> Am 24.06.2013 16:33, schrieb Paolo Bonzini:
>>> Il 22/06/2013 22:58, Peter Lieven ha scritto:
>>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>>> ---
>>>> qemu-img.c | 10 +++++++---
>>>> 1 file changed, 7 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/qemu-img.c b/qemu-img.c
>>>> index 809b4f1..5aa53ab 100644
>>>> --- a/qemu-img.c
>>>> +++ b/qemu-img.c
>>>> @@ -1513,9 +1513,13 @@ static int img_convert(int argc, char **argv)
>>>> If the output is to a host device, we also write out
>>>> sectors that are entirely 0, since whatever data was
>>>> already there is garbage, not 0s. */
>>>> - if (!has_zero_init || out_baseimg ||
>>>> - is_allocated_sectors_min(buf1, n, &n1, min_sparse)) {
>>>> - ret = bdrv_write(out_bs, sector_num, buf1, n1);
>>>> + int allocated = is_allocated_sectors_min(buf1, n, &n1, min_sparse);
>>>> + if (!has_zero_init || out_baseimg || allocated) {
>>>> + if (allocated || out_baseimg) {
>>>> + ret = bdrv_write(out_bs, sector_num, buf1, n1);
>>>> + } else {
>>>> + ret = bdrv_write_zeroes(out_bs, sector_num, n1);
>>> I think it should still do the write only if !has_zero_init.
>> With this I am still in trouble with iSCSI, but Kevin pointed out another
>> possible solution for the allocating everything problem.
>>
>> a) let iscsi_create discard the whole device if lbpz && lbprz
>> b) return 1 for has_zero_init if lbprz.
>>
>> What do you think?
> Fine by me (but the discard may take a looong time! :)).
fyi: i have created an optimized unmap function which transmits the
maximum allowed unmap_list length (read from block_limits page).
for 1TB it takes approx. 1 minute on my storage.
i will include this in v2 of the series and also add the optimization
to the other unmap calls (which currently do not check max_unmap
size at all).
Peter
^ permalink raw reply [flat|nested] 26+ messages in thread
* [Qemu-devel] [PATCH 7/8] iscsi: assert that sectors are aligned to LUN blocksize
2013-06-22 20:58 [Qemu-devel] [PATCH 0/8] iscsi/qemu-img/block-migration enhancements Peter Lieven
` (5 preceding siblings ...)
2013-06-22 20:58 ` [Qemu-devel] [PATCH 6/8] qemu-img: use bdrv_write_zeroes to write zeroes Peter Lieven
@ 2013-06-22 20:58 ` Peter Lieven
2013-06-24 14:30 ` Paolo Bonzini
2013-06-22 20:58 ` [Qemu-devel] [PATCH 8/8] block-migration: efficiently encode zero blocks Peter Lieven
7 siblings, 1 reply; 26+ messages in thread
From: Peter Lieven @ 2013-06-22 20:58 UTC (permalink / raw)
To: qemu-devel
Cc: Kevin Wolf, pbonzini, Peter Lieven, ronniesahlberg,
Stefan Hajnoczi
if the blocksize of an iSCSI LUN is bigger than the BDRV_SECTOR_SIZE
it is possible that sector_num or nb_sectors are not correctly
alligned.
for now assert that there is no misalignment to avoid data
corruption.
Signed-off-by: Peter Lieven <pl@kamp.de>
---
block/iscsi.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/block/iscsi.c b/block/iscsi.c
index 683692a..2c410b1 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -249,6 +249,7 @@ iscsi_aio_write16_cb(struct iscsi_context *iscsi, int status,
static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
{
+ assert((sector * BDRV_SECTOR_SIZE) % iscsilun->block_size == 0);
return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
}
@@ -269,6 +270,8 @@ iscsi_aio_writev_acb(IscsiAIOCB *acb)
acb->status = -EINPROGRESS;
acb->buf = NULL;
+ assert((acb->nb_sectors * BDRV_SECTOR_SIZE) % acb->iscsilun->block_size == 0);
+
/* this will allow us to get rid of 'buf' completely */
size = acb->nb_sectors * BDRV_SECTOR_SIZE;
@@ -618,6 +621,8 @@ static int iscsi_aio_discard_acb(IscsiAIOCB *acb) {
acb->buf = NULL;
list[0].lba = sector_qemu2lun(acb->sector_num, acb->iscsilun);
+
+ assert((acb->nb_sectors * BDRV_SECTOR_SIZE) % acb->iscsilun->block_size == 0);
list[0].num = acb->nb_sectors * BDRV_SECTOR_SIZE / acb->iscsilun->block_size;
acb->task = iscsi_unmap_task(iscsi, acb->iscsilun->lun,
@@ -947,6 +952,8 @@ coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
iTask.complete = 0;
iTask.bs = bs;
+ assert((nb_sectors * BDRV_SECTOR_SIZE) % iscsilun->block_size == 0);
+
list[0].lba = sector_qemu2lun(sector_num, iscsilun);
list[0].num = nb_sectors * BDRV_SECTOR_SIZE / iscsilun->block_size;
--
1.7.9.5
^ permalink raw reply related [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] [PATCH 7/8] iscsi: assert that sectors are aligned to LUN blocksize
2013-06-22 20:58 ` [Qemu-devel] [PATCH 7/8] iscsi: assert that sectors are aligned to LUN blocksize Peter Lieven
@ 2013-06-24 14:30 ` Paolo Bonzini
2013-06-24 16:10 ` Peter Lieven
0 siblings, 1 reply; 26+ messages in thread
From: Paolo Bonzini @ 2013-06-24 14:30 UTC (permalink / raw)
To: Peter Lieven; +Cc: Kevin Wolf, ronniesahlberg, qemu-devel, Stefan Hajnoczi
Il 22/06/2013 22:58, Peter Lieven ha scritto:
> if the blocksize of an iSCSI LUN is bigger than the BDRV_SECTOR_SIZE
> it is possible that sector_num or nb_sectors are not correctly
> alligned.
>
> for now assert that there is no misalignment to avoid data
> corruption.
You should just fail to open the LUN instead.
Paolo
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
> block/iscsi.c | 7 +++++++
> 1 file changed, 7 insertions(+)
>
> diff --git a/block/iscsi.c b/block/iscsi.c
> index 683692a..2c410b1 100644
> --- a/block/iscsi.c
> +++ b/block/iscsi.c
> @@ -249,6 +249,7 @@ iscsi_aio_write16_cb(struct iscsi_context *iscsi, int status,
>
> static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
> {
> + assert((sector * BDRV_SECTOR_SIZE) % iscsilun->block_size == 0);
> return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
> }
>
> @@ -269,6 +270,8 @@ iscsi_aio_writev_acb(IscsiAIOCB *acb)
> acb->status = -EINPROGRESS;
> acb->buf = NULL;
>
> + assert((acb->nb_sectors * BDRV_SECTOR_SIZE) % acb->iscsilun->block_size == 0);
> +
> /* this will allow us to get rid of 'buf' completely */
> size = acb->nb_sectors * BDRV_SECTOR_SIZE;
>
> @@ -618,6 +621,8 @@ static int iscsi_aio_discard_acb(IscsiAIOCB *acb) {
> acb->buf = NULL;
>
> list[0].lba = sector_qemu2lun(acb->sector_num, acb->iscsilun);
> +
> + assert((acb->nb_sectors * BDRV_SECTOR_SIZE) % acb->iscsilun->block_size == 0);
> list[0].num = acb->nb_sectors * BDRV_SECTOR_SIZE / acb->iscsilun->block_size;
>
> acb->task = iscsi_unmap_task(iscsi, acb->iscsilun->lun,
> @@ -947,6 +952,8 @@ coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
> iTask.complete = 0;
> iTask.bs = bs;
>
> + assert((nb_sectors * BDRV_SECTOR_SIZE) % iscsilun->block_size == 0);
> +
> list[0].lba = sector_qemu2lun(sector_num, iscsilun);
> list[0].num = nb_sectors * BDRV_SECTOR_SIZE / iscsilun->block_size;
>
>
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] [PATCH 7/8] iscsi: assert that sectors are aligned to LUN blocksize
2013-06-24 14:30 ` Paolo Bonzini
@ 2013-06-24 16:10 ` Peter Lieven
2013-06-24 16:13 ` Paolo Bonzini
0 siblings, 1 reply; 26+ messages in thread
From: Peter Lieven @ 2013-06-24 16:10 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: Kevin Wolf, ronniesahlberg, qemu-devel, Stefan Hajnoczi
Am 24.06.2013 16:30, schrieb Paolo Bonzini:
> Il 22/06/2013 22:58, Peter Lieven ha scritto:
>> if the blocksize of an iSCSI LUN is bigger than the BDRV_SECTOR_SIZE
>> it is possible that sector_num or nb_sectors are not correctly
>> alligned.
>>
>> for now assert that there is no misalignment to avoid data
>> corruption.
> You should just fail to open the LUN instead.
Ronnie added support for reading fragments of blocksize if the
offset is aligned. I don't know what the idea was, maybe for qemu
reading the boot sector. If the OS does only read multiple of
blocksize at aligned offsets everything should work. I now that
our storages support 4K blocksize. I can check if it is usable.
I could also just fail the operations instead of asserting.
Peter
>
> Paolo
>
>> Signed-off-by: Peter Lieven <pl@kamp.de>
>> ---
>> block/iscsi.c | 7 +++++++
>> 1 file changed, 7 insertions(+)
>>
>> diff --git a/block/iscsi.c b/block/iscsi.c
>> index 683692a..2c410b1 100644
>> --- a/block/iscsi.c
>> +++ b/block/iscsi.c
>> @@ -249,6 +249,7 @@ iscsi_aio_write16_cb(struct iscsi_context *iscsi, int status,
>>
>> static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
>> {
>> + assert((sector * BDRV_SECTOR_SIZE) % iscsilun->block_size == 0);
>> return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
>> }
>>
>> @@ -269,6 +270,8 @@ iscsi_aio_writev_acb(IscsiAIOCB *acb)
>> acb->status = -EINPROGRESS;
>> acb->buf = NULL;
>>
>> + assert((acb->nb_sectors * BDRV_SECTOR_SIZE) % acb->iscsilun->block_size == 0);
>> +
>> /* this will allow us to get rid of 'buf' completely */
>> size = acb->nb_sectors * BDRV_SECTOR_SIZE;
>>
>> @@ -618,6 +621,8 @@ static int iscsi_aio_discard_acb(IscsiAIOCB *acb) {
>> acb->buf = NULL;
>>
>> list[0].lba = sector_qemu2lun(acb->sector_num, acb->iscsilun);
>> +
>> + assert((acb->nb_sectors * BDRV_SECTOR_SIZE) % acb->iscsilun->block_size == 0);
>> list[0].num = acb->nb_sectors * BDRV_SECTOR_SIZE / acb->iscsilun->block_size;
>>
>> acb->task = iscsi_unmap_task(iscsi, acb->iscsilun->lun,
>> @@ -947,6 +952,8 @@ coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
>> iTask.complete = 0;
>> iTask.bs = bs;
>>
>> + assert((nb_sectors * BDRV_SECTOR_SIZE) % iscsilun->block_size == 0);
>> +
>> list[0].lba = sector_qemu2lun(sector_num, iscsilun);
>> list[0].num = nb_sectors * BDRV_SECTOR_SIZE / iscsilun->block_size;
>>
>>
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] [PATCH 7/8] iscsi: assert that sectors are aligned to LUN blocksize
2013-06-24 16:10 ` Peter Lieven
@ 2013-06-24 16:13 ` Paolo Bonzini
2013-06-24 16:24 ` Peter Lieven
0 siblings, 1 reply; 26+ messages in thread
From: Paolo Bonzini @ 2013-06-24 16:13 UTC (permalink / raw)
To: Peter Lieven; +Cc: Kevin Wolf, ronniesahlberg, qemu-devel, Stefan Hajnoczi
Il 24/06/2013 18:10, Peter Lieven ha scritto:
> Am 24.06.2013 16:30, schrieb Paolo Bonzini:
>> Il 22/06/2013 22:58, Peter Lieven ha scritto:
>>> if the blocksize of an iSCSI LUN is bigger than the BDRV_SECTOR_SIZE
>>> it is possible that sector_num or nb_sectors are not correctly
>>> alligned.
>>>
>>> for now assert that there is no misalignment to avoid data
>>> corruption.
>> You should just fail to open the LUN instead.
> Ronnie added support for reading fragments of blocksize if the
> offset is aligned. I don't know what the idea was, maybe for qemu
> reading the boot sector. If the OS does only read multiple of
> blocksize at aligned offsets everything should work. I now that
> our storages support 4K blocksize. I can check if it is usable.
> I could also just fail the operations instead of asserting.
So far, 4K blocksize is usable if you also specify the same block size
for the guest device. I have posted once the patches to do
read-modify-write, but I never really pursued inclusion of those.
In any case, the right place to fix this is the block layer;
driver-specific hacks are... hacks. :)
Paolo
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] [PATCH 7/8] iscsi: assert that sectors are aligned to LUN blocksize
2013-06-24 16:13 ` Paolo Bonzini
@ 2013-06-24 16:24 ` Peter Lieven
2013-06-24 16:27 ` Paolo Bonzini
0 siblings, 1 reply; 26+ messages in thread
From: Peter Lieven @ 2013-06-24 16:24 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: Kevin Wolf, ronniesahlberg, qemu-devel, Stefan Hajnoczi
Am 24.06.2013 18:13, schrieb Paolo Bonzini:
> Il 24/06/2013 18:10, Peter Lieven ha scritto:
>> Am 24.06.2013 16:30, schrieb Paolo Bonzini:
>>> Il 22/06/2013 22:58, Peter Lieven ha scritto:
>>>> if the blocksize of an iSCSI LUN is bigger than the BDRV_SECTOR_SIZE
>>>> it is possible that sector_num or nb_sectors are not correctly
>>>> alligned.
>>>>
>>>> for now assert that there is no misalignment to avoid data
>>>> corruption.
>>> You should just fail to open the LUN instead.
>> Ronnie added support for reading fragments of blocksize if the
>> offset is aligned. I don't know what the idea was, maybe for qemu
>> reading the boot sector. If the OS does only read multiple of
>> blocksize at aligned offsets everything should work. I now that
>> our storages support 4K blocksize. I can check if it is usable.
>> I could also just fail the operations instead of asserting.
> So far, 4K blocksize is usable if you also specify the same block size
> for the guest device. I have posted once the patches to do
> read-modify-write, but I never really pursued inclusion of those.
>
> In any case, the right place to fix this is the block layer;
> driver-specific hacks are... hacks. :)
Where do I find the sector size if not in BDRV_SECTOR_SIZE?
Is there a dynamic field or is the answer qemu only support 512 Byte
sector size at the moment?
So you would go for fail to open a device if the LUN blocksize is
not equal to BDRV_SECTOR_SIZE?
Peter
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] [PATCH 7/8] iscsi: assert that sectors are aligned to LUN blocksize
2013-06-24 16:24 ` Peter Lieven
@ 2013-06-24 16:27 ` Paolo Bonzini
2013-06-24 16:36 ` Peter Lieven
0 siblings, 1 reply; 26+ messages in thread
From: Paolo Bonzini @ 2013-06-24 16:27 UTC (permalink / raw)
To: Peter Lieven; +Cc: Kevin Wolf, ronniesahlberg, qemu-devel, Stefan Hajnoczi
Il 24/06/2013 18:24, Peter Lieven ha scritto:
>> > So far, 4K blocksize is usable if you also specify the same block size
>> > for the guest device. I have posted once the patches to do
>> > read-modify-write, but I never really pursued inclusion of those.
>> >
>> > In any case, the right place to fix this is the block layer;
>> > driver-specific hacks are... hacks. :)
> Where do I find the sector size if not in BDRV_SECTOR_SIZE?
> Is there a dynamic field or is the answer qemu only support 512 Byte
> sector size at the moment?
bdrv_read/write and friends are always in 512-byte units.
There is bs->buffer_alignment for the guest sector size, but it is not
set at bdrv_open, only later. So I think QEMU support for bigger
sectors is too sparse to be useful.
> So you would go for fail to open a device if the LUN blocksize is
> not equal to BDRV_SECTOR_SIZE?
Either that, or fails reads/writes that are not aligned.
Paolo
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] [PATCH 7/8] iscsi: assert that sectors are aligned to LUN blocksize
2013-06-24 16:27 ` Paolo Bonzini
@ 2013-06-24 16:36 ` Peter Lieven
0 siblings, 0 replies; 26+ messages in thread
From: Peter Lieven @ 2013-06-24 16:36 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: Kevin Wolf, ronniesahlberg, qemu-devel, Stefan Hajnoczi
Am 24.06.2013 18:27, schrieb Paolo Bonzini:
> Il 24/06/2013 18:24, Peter Lieven ha scritto:
>>>> So far, 4K blocksize is usable if you also specify the same block size
>>>> for the guest device. I have posted once the patches to do
>>>> read-modify-write, but I never really pursued inclusion of those.
>>>>
>>>> In any case, the right place to fix this is the block layer;
>>>> driver-specific hacks are... hacks. :)
>> Where do I find the sector size if not in BDRV_SECTOR_SIZE?
>> Is there a dynamic field or is the answer qemu only support 512 Byte
>> sector size at the moment?
> bdrv_read/write and friends are always in 512-byte units.
>
> There is bs->buffer_alignment for the guest sector size, but it is not
> set at bdrv_open, only later. So I think QEMU support for bigger
> sectors is too sparse to be useful.
>
>> So you would go for fail to open a device if the LUN blocksize is
>> not equal to BDRV_SECTOR_SIZE?
> Either that, or fails reads/writes that are not aligned.
It would be the easiest. Maybe Ronnie can point out what the hack
with the acb->buffer_offset in aio_read was meant for. All other
functions have the requirement to be properly aligned regarding
to the xfersize. I am also not sure if the e.g. reading of 512 byte
at sector 0 from a 4K block device does still work with the
iovector patches we added earlier.
Peter
>
> Paolo
^ permalink raw reply [flat|nested] 26+ messages in thread
* [Qemu-devel] [PATCH 8/8] block-migration: efficiently encode zero blocks
2013-06-22 20:58 [Qemu-devel] [PATCH 0/8] iscsi/qemu-img/block-migration enhancements Peter Lieven
` (6 preceding siblings ...)
2013-06-22 20:58 ` [Qemu-devel] [PATCH 7/8] iscsi: assert that sectors are aligned to LUN blocksize Peter Lieven
@ 2013-06-22 20:58 ` Peter Lieven
2013-06-24 14:32 ` Paolo Bonzini
7 siblings, 1 reply; 26+ messages in thread
From: Peter Lieven @ 2013-06-22 20:58 UTC (permalink / raw)
To: qemu-devel
Cc: Kevin Wolf, pbonzini, Peter Lieven, ronniesahlberg,
Stefan Hajnoczi
this patch adds a efficient encoding for zero blocks by
adding a new flag indiciating a block is completly zero.
additionally bdrv_write_zeros() is used at the destination
to efficiently write these zeroes. if the driver supports
it this avoids blindly allocating all sectors consumed by
zero blocks effectively re-thinning the device.
Signed-off-by: Peter Lieven <pl@kamp.de>
---
block-migration.c | 29 +++++++++++++++++++++++------
include/migration/qemu-file.h | 1 +
savevm.c | 2 +-
3 files changed, 25 insertions(+), 7 deletions(-)
diff --git a/block-migration.c b/block-migration.c
index 2fd7699..99b3757 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -29,6 +29,7 @@
#define BLK_MIG_FLAG_DEVICE_BLOCK 0x01
#define BLK_MIG_FLAG_EOS 0x02
#define BLK_MIG_FLAG_PROGRESS 0x04
+#define BLK_MIG_FLAG_ZERO_BLOCK 0x08
#define MAX_IS_ALLOCATED_SEARCH 65536
@@ -114,16 +115,29 @@ static void blk_mig_unlock(void)
static void blk_send(QEMUFile *f, BlkMigBlock * blk)
{
int len;
+ int flags = BLK_MIG_FLAG_DEVICE_BLOCK;
+
+ if (buffer_is_zero(blk->buf, BLOCK_SIZE)) {
+ flags |= BLK_MIG_FLAG_ZERO_BLOCK;
+ }
/* sector number and flags */
qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
- | BLK_MIG_FLAG_DEVICE_BLOCK);
+ | flags);
/* device name */
len = strlen(blk->bmds->bs->device_name);
qemu_put_byte(f, len);
qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);
+ /* if a block is zero we need to flush here since the network
+ * bandwidth is now a lot higher than the storage device bandwidth.
+ * thus if we queue zero blocks we slow down the migration */
+ if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
+ qemu_fflush(f);
+ return;
+ }
+
qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
}
@@ -762,12 +776,15 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
}
- buf = g_malloc(BLOCK_SIZE);
-
- qemu_get_buffer(f, buf, BLOCK_SIZE);
- ret = bdrv_write(bs, addr, buf, nr_sectors);
+ if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
+ ret = bdrv_write_zeroes(bs, addr, nr_sectors);
+ } else {
+ buf = g_malloc(BLOCK_SIZE);
+ qemu_get_buffer(f, buf, BLOCK_SIZE);
+ ret = bdrv_write(bs, addr, buf, nr_sectors);
+ g_free(buf);
+ }
- g_free(buf);
if (ret < 0) {
return ret;
}
diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
index 7519464..b73298d 100644
--- a/include/migration/qemu-file.h
+++ b/include/migration/qemu-file.h
@@ -71,6 +71,7 @@ QEMUFile *qemu_fdopen(int fd, const char *mode);
QEMUFile *qemu_fopen_socket(int fd, const char *mode);
QEMUFile *qemu_popen_cmd(const char *command, const char *mode);
int qemu_get_fd(QEMUFile *f);
+void qemu_fflush(QEMUFile *f);
int qemu_fclose(QEMUFile *f);
int64_t qemu_ftell(QEMUFile *f);
void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size);
diff --git a/savevm.c b/savevm.c
index ff5ece6..4d12d92 100644
--- a/savevm.c
+++ b/savevm.c
@@ -610,7 +610,7 @@ static inline bool qemu_file_is_writable(QEMUFile *f)
* If there is writev_buffer QEMUFileOps it uses it otherwise uses
* put_buffer ops.
*/
-static void qemu_fflush(QEMUFile *f)
+void qemu_fflush(QEMUFile *f)
{
ssize_t ret = 0;
--
1.7.9.5
^ permalink raw reply related [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] [PATCH 8/8] block-migration: efficiently encode zero blocks
2013-06-22 20:58 ` [Qemu-devel] [PATCH 8/8] block-migration: efficiently encode zero blocks Peter Lieven
@ 2013-06-24 14:32 ` Paolo Bonzini
2013-06-24 16:14 ` Peter Lieven
0 siblings, 1 reply; 26+ messages in thread
From: Paolo Bonzini @ 2013-06-24 14:32 UTC (permalink / raw)
To: Peter Lieven; +Cc: Kevin Wolf, ronniesahlberg, qemu-devel, Stefan Hajnoczi
Il 22/06/2013 22:58, Peter Lieven ha scritto:
> this patch adds a efficient encoding for zero blocks by
> adding a new flag indiciating a block is completly zero.
>
> additionally bdrv_write_zeros() is used at the destination
> to efficiently write these zeroes. if the driver supports
> it this avoids blindly allocating all sectors consumed by
> zero blocks effectively re-thinning the device.
>
> Signed-off-by: Peter Lieven <pl@kamp.de>
This is a bit ugly because it doesn't work with drive-mirror. Perhaps
we can add a write-zeroes opcode to NBD, too.
Paolo
> ---
> block-migration.c | 29 +++++++++++++++++++++++------
> include/migration/qemu-file.h | 1 +
> savevm.c | 2 +-
> 3 files changed, 25 insertions(+), 7 deletions(-)
>
> diff --git a/block-migration.c b/block-migration.c
> index 2fd7699..99b3757 100644
> --- a/block-migration.c
> +++ b/block-migration.c
> @@ -29,6 +29,7 @@
> #define BLK_MIG_FLAG_DEVICE_BLOCK 0x01
> #define BLK_MIG_FLAG_EOS 0x02
> #define BLK_MIG_FLAG_PROGRESS 0x04
> +#define BLK_MIG_FLAG_ZERO_BLOCK 0x08
>
> #define MAX_IS_ALLOCATED_SEARCH 65536
>
> @@ -114,16 +115,29 @@ static void blk_mig_unlock(void)
> static void blk_send(QEMUFile *f, BlkMigBlock * blk)
> {
> int len;
> + int flags = BLK_MIG_FLAG_DEVICE_BLOCK;
> +
> + if (buffer_is_zero(blk->buf, BLOCK_SIZE)) {
> + flags |= BLK_MIG_FLAG_ZERO_BLOCK;
> + }
>
> /* sector number and flags */
> qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
> - | BLK_MIG_FLAG_DEVICE_BLOCK);
> + | flags);
>
> /* device name */
> len = strlen(blk->bmds->bs->device_name);
> qemu_put_byte(f, len);
> qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);
>
> + /* if a block is zero we need to flush here since the network
> + * bandwidth is now a lot higher than the storage device bandwidth.
> + * thus if we queue zero blocks we slow down the migration */
> + if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
> + qemu_fflush(f);
> + return;
> + }
> +
> qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
> }
>
> @@ -762,12 +776,15 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
> nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
> }
>
> - buf = g_malloc(BLOCK_SIZE);
> -
> - qemu_get_buffer(f, buf, BLOCK_SIZE);
> - ret = bdrv_write(bs, addr, buf, nr_sectors);
> + if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
> + ret = bdrv_write_zeroes(bs, addr, nr_sectors);
> + } else {
> + buf = g_malloc(BLOCK_SIZE);
> + qemu_get_buffer(f, buf, BLOCK_SIZE);
> + ret = bdrv_write(bs, addr, buf, nr_sectors);
> + g_free(buf);
> + }
>
> - g_free(buf);
> if (ret < 0) {
> return ret;
> }
> diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
> index 7519464..b73298d 100644
> --- a/include/migration/qemu-file.h
> +++ b/include/migration/qemu-file.h
> @@ -71,6 +71,7 @@ QEMUFile *qemu_fdopen(int fd, const char *mode);
> QEMUFile *qemu_fopen_socket(int fd, const char *mode);
> QEMUFile *qemu_popen_cmd(const char *command, const char *mode);
> int qemu_get_fd(QEMUFile *f);
> +void qemu_fflush(QEMUFile *f);
> int qemu_fclose(QEMUFile *f);
> int64_t qemu_ftell(QEMUFile *f);
> void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size);
> diff --git a/savevm.c b/savevm.c
> index ff5ece6..4d12d92 100644
> --- a/savevm.c
> +++ b/savevm.c
> @@ -610,7 +610,7 @@ static inline bool qemu_file_is_writable(QEMUFile *f)
> * If there is writev_buffer QEMUFileOps it uses it otherwise uses
> * put_buffer ops.
> */
> -static void qemu_fflush(QEMUFile *f)
> +void qemu_fflush(QEMUFile *f)
> {
> ssize_t ret = 0;
>
>
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] [PATCH 8/8] block-migration: efficiently encode zero blocks
2013-06-24 14:32 ` Paolo Bonzini
@ 2013-06-24 16:14 ` Peter Lieven
0 siblings, 0 replies; 26+ messages in thread
From: Peter Lieven @ 2013-06-24 16:14 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: Kevin Wolf, ronniesahlberg, qemu-devel, Stefan Hajnoczi
Am 24.06.2013 16:32, schrieb Paolo Bonzini:
> Il 22/06/2013 22:58, Peter Lieven ha scritto:
>> this patch adds a efficient encoding for zero blocks by
>> adding a new flag indiciating a block is completly zero.
>>
>> additionally bdrv_write_zeros() is used at the destination
>> to efficiently write these zeroes. if the driver supports
>> it this avoids blindly allocating all sectors consumed by
>> zero blocks effectively re-thinning the device.
>>
>> Signed-off-by: Peter Lieven <pl@kamp.de>
> This is a bit ugly because it doesn't work with drive-mirror. Perhaps
> we can add a write-zeroes opcode to NBD, too.
That is the exact problem with NBD. If write zeroes is not supported
the routine falls back to write zeros with writev.
Peter
>
> Paolo
>
>> ---
>> block-migration.c | 29 +++++++++++++++++++++++------
>> include/migration/qemu-file.h | 1 +
>> savevm.c | 2 +-
>> 3 files changed, 25 insertions(+), 7 deletions(-)
>>
>> diff --git a/block-migration.c b/block-migration.c
>> index 2fd7699..99b3757 100644
>> --- a/block-migration.c
>> +++ b/block-migration.c
>> @@ -29,6 +29,7 @@
>> #define BLK_MIG_FLAG_DEVICE_BLOCK 0x01
>> #define BLK_MIG_FLAG_EOS 0x02
>> #define BLK_MIG_FLAG_PROGRESS 0x04
>> +#define BLK_MIG_FLAG_ZERO_BLOCK 0x08
>>
>> #define MAX_IS_ALLOCATED_SEARCH 65536
>>
>> @@ -114,16 +115,29 @@ static void blk_mig_unlock(void)
>> static void blk_send(QEMUFile *f, BlkMigBlock * blk)
>> {
>> int len;
>> + int flags = BLK_MIG_FLAG_DEVICE_BLOCK;
>> +
>> + if (buffer_is_zero(blk->buf, BLOCK_SIZE)) {
>> + flags |= BLK_MIG_FLAG_ZERO_BLOCK;
>> + }
>>
>> /* sector number and flags */
>> qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
>> - | BLK_MIG_FLAG_DEVICE_BLOCK);
>> + | flags);
>>
>> /* device name */
>> len = strlen(blk->bmds->bs->device_name);
>> qemu_put_byte(f, len);
>> qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);
>>
>> + /* if a block is zero we need to flush here since the network
>> + * bandwidth is now a lot higher than the storage device bandwidth.
>> + * thus if we queue zero blocks we slow down the migration */
>> + if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
>> + qemu_fflush(f);
>> + return;
>> + }
>> +
>> qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
>> }
>>
>> @@ -762,12 +776,15 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
>> nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
>> }
>>
>> - buf = g_malloc(BLOCK_SIZE);
>> -
>> - qemu_get_buffer(f, buf, BLOCK_SIZE);
>> - ret = bdrv_write(bs, addr, buf, nr_sectors);
>> + if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
>> + ret = bdrv_write_zeroes(bs, addr, nr_sectors);
>> + } else {
>> + buf = g_malloc(BLOCK_SIZE);
>> + qemu_get_buffer(f, buf, BLOCK_SIZE);
>> + ret = bdrv_write(bs, addr, buf, nr_sectors);
>> + g_free(buf);
>> + }
>>
>> - g_free(buf);
>> if (ret < 0) {
>> return ret;
>> }
>> diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
>> index 7519464..b73298d 100644
>> --- a/include/migration/qemu-file.h
>> +++ b/include/migration/qemu-file.h
>> @@ -71,6 +71,7 @@ QEMUFile *qemu_fdopen(int fd, const char *mode);
>> QEMUFile *qemu_fopen_socket(int fd, const char *mode);
>> QEMUFile *qemu_popen_cmd(const char *command, const char *mode);
>> int qemu_get_fd(QEMUFile *f);
>> +void qemu_fflush(QEMUFile *f);
>> int qemu_fclose(QEMUFile *f);
>> int64_t qemu_ftell(QEMUFile *f);
>> void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size);
>> diff --git a/savevm.c b/savevm.c
>> index ff5ece6..4d12d92 100644
>> --- a/savevm.c
>> +++ b/savevm.c
>> @@ -610,7 +610,7 @@ static inline bool qemu_file_is_writable(QEMUFile *f)
>> * If there is writev_buffer QEMUFileOps it uses it otherwise uses
>> * put_buffer ops.
>> */
>> -static void qemu_fflush(QEMUFile *f)
>> +void qemu_fflush(QEMUFile *f)
>> {
>> ssize_t ret = 0;
>>
>>
^ permalink raw reply [flat|nested] 26+ messages in thread