* [Qemu-devel] [PATCHv7 01/17] block: make BdrvRequestFlags public
2013-10-24 10:06 [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements Peter Lieven
@ 2013-10-24 10:06 ` Peter Lieven
2013-10-24 10:06 ` [Qemu-devel] [PATCHv7 02/17] block: add flags to bdrv_*_write_zeroes Peter Lieven
` (17 subsequent siblings)
18 siblings, 0 replies; 33+ messages in thread
From: Peter Lieven @ 2013-10-24 10:06 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, stefanha, Peter Lieven, ronniesahlberg, pbonzini
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Peter Lieven <pl@kamp.de>
---
block.c | 5 -----
include/block/block.h | 5 +++++
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/block.c b/block.c
index fd05a80..eb11a07 100644
--- a/block.c
+++ b/block.c
@@ -51,11 +51,6 @@
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
-typedef enum {
- BDRV_REQ_COPY_ON_READ = 0x1,
- BDRV_REQ_ZERO_WRITE = 0x2,
-} BdrvRequestFlags;
-
static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
diff --git a/include/block/block.h b/include/block/block.h
index 3560deb..ba2082c 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -62,6 +62,11 @@ typedef struct BlockDevOps {
void (*resize_cb)(void *opaque);
} BlockDevOps;
+typedef enum {
+ BDRV_REQ_COPY_ON_READ = 0x1,
+ BDRV_REQ_ZERO_WRITE = 0x2,
+} BdrvRequestFlags;
+
#define BDRV_O_RDWR 0x0002
#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */
#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
--
1.7.9.5
^ permalink raw reply related [flat|nested] 33+ messages in thread
* [Qemu-devel] [PATCHv7 02/17] block: add flags to bdrv_*_write_zeroes
2013-10-24 10:06 [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements Peter Lieven
2013-10-24 10:06 ` [Qemu-devel] [PATCHv7 01/17] block: make BdrvRequestFlags public Peter Lieven
@ 2013-10-24 10:06 ` Peter Lieven
2013-10-24 10:06 ` [Qemu-devel] [PATCHv7 03/17] block: introduce BDRV_REQ_MAY_UNMAP request flag Peter Lieven
` (16 subsequent siblings)
18 siblings, 0 replies; 33+ messages in thread
From: Peter Lieven @ 2013-10-24 10:06 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, stefanha, Peter Lieven, ronniesahlberg, pbonzini
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Peter Lieven <pl@kamp.de>
---
block-migration.c | 2 +-
block.c | 20 +++++++++++---------
block/backup.c | 3 ++-
block/qcow2-cluster.c | 2 +-
block/qcow2.c | 2 +-
block/qed.c | 3 ++-
block/raw_bsd.c | 5 +++--
block/vmdk.c | 3 ++-
include/block/block.h | 4 ++--
include/block/block_int.h | 2 +-
qemu-io-cmds.c | 2 +-
11 files changed, 27 insertions(+), 21 deletions(-)
diff --git a/block-migration.c b/block-migration.c
index daf9ec1..713a8e3 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -780,7 +780,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
}
if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
- ret = bdrv_write_zeroes(bs, addr, nr_sectors);
+ ret = bdrv_write_zeroes(bs, addr, nr_sectors, 0);
} else {
buf = g_malloc(BLOCK_SIZE);
qemu_get_buffer(f, buf, BLOCK_SIZE);
diff --git a/block.c b/block.c
index eb11a07..3259429 100644
--- a/block.c
+++ b/block.c
@@ -79,7 +79,7 @@ static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
bool is_write);
static void coroutine_fn bdrv_co_do_rw(void *opaque);
static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors);
+ int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
QTAILQ_HEAD_INITIALIZER(bdrv_states);
@@ -2384,10 +2384,11 @@ int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov)
return bdrv_rwv_co(bs, sector_num, qiov, true, 0);
}
-int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
+int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, BdrvRequestFlags flags)
{
return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
- BDRV_REQ_ZERO_WRITE);
+ BDRV_REQ_ZERO_WRITE | flags);
}
int bdrv_pread(BlockDriverState *bs, int64_t offset,
@@ -2569,7 +2570,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
if (drv->bdrv_co_write_zeroes &&
buffer_is_zero(bounce_buffer, iov.iov_len)) {
ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
- cluster_nb_sectors);
+ cluster_nb_sectors, 0);
} else {
/* This does not change the data on the disk, it is not necessary
* to flush even in cache=writethrough mode.
@@ -2703,7 +2704,7 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
}
static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
+ int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
{
BlockDriver *drv = bs->drv;
QEMUIOVector qiov;
@@ -2715,7 +2716,7 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
/* First try the efficient write zeroes operation */
if (drv->bdrv_co_write_zeroes) {
- ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
+ ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
if (ret != -ENOTSUP) {
return ret;
}
@@ -2770,7 +2771,7 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
if (ret < 0) {
/* Do nothing, write notifier decided to fail this request */
} else if (flags & BDRV_REQ_ZERO_WRITE) {
- ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
+ ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
} else {
ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
}
@@ -2804,12 +2805,13 @@ int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
}
int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
+ int64_t sector_num, int nb_sectors,
+ BdrvRequestFlags flags)
{
trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
- BDRV_REQ_ZERO_WRITE);
+ BDRV_REQ_ZERO_WRITE | flags);
}
/**
diff --git a/block/backup.c b/block/backup.c
index cad14c9..830a179 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -138,7 +138,8 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
ret = bdrv_co_write_zeroes(job->target,
- start * BACKUP_SECTORS_PER_CLUSTER, n);
+ start * BACKUP_SECTORS_PER_CLUSTER,
+ n, 0);
} else {
ret = bdrv_co_writev(job->target,
start * BACKUP_SECTORS_PER_CLUSTER, n,
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 0348b97..2752396 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1613,7 +1613,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
}
ret = bdrv_write_zeroes(bs->file, offset / BDRV_SECTOR_SIZE,
- s->cluster_sectors);
+ s->cluster_sectors, 0);
if (ret < 0) {
if (!preallocated) {
qcow2_free_clusters(bs, offset, s->cluster_size,
diff --git a/block/qcow2.c b/block/qcow2.c
index c1abaff..1beb2e1 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1686,7 +1686,7 @@ static int qcow2_make_empty(BlockDriverState *bs)
}
static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
+ int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
{
int ret;
BDRVQcowState *s = bs->opaque;
diff --git a/block/qed.c b/block/qed.c
index 6c0cba0..adc2736 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -1397,7 +1397,8 @@ static void coroutine_fn qed_co_write_zeroes_cb(void *opaque, int ret)
static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs,
int64_t sector_num,
- int nb_sectors)
+ int nb_sectors,
+ BdrvRequestFlags flags)
{
BlockDriverAIOCB *blockacb;
BDRVQEDState *s = bs->opaque;
diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index 0078c1b..b0dd23f 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -68,9 +68,10 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
}
static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
+ int64_t sector_num, int nb_sectors,
+ BdrvRequestFlags flags)
{
- return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors);
+ return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors, flags);
}
static int coroutine_fn raw_co_discard(BlockDriverState *bs,
diff --git a/block/vmdk.c b/block/vmdk.c
index 32ec8b7..9c857cd 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1403,7 +1403,8 @@ static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num,
static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs,
int64_t sector_num,
- int nb_sectors)
+ int nb_sectors,
+ BdrvRequestFlags flags)
{
int ret;
BDRVVmdkState *s = bs->opaque;
diff --git a/include/block/block.h b/include/block/block.h
index ba2082c..8ba9f0c 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -192,7 +192,7 @@ int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
int bdrv_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors);
int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors);
+ int nb_sectors, BdrvRequestFlags flags);
int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov);
int bdrv_pread(BlockDriverState *bs, int64_t offset,
void *buf, int count);
@@ -214,7 +214,7 @@ int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
* because it may allocate memory for the entire region.
*/
int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors);
+ int nb_sectors, BdrvRequestFlags flags);
BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
const char *backing_file);
int bdrv_get_backing_file_depth(BlockDriverState *bs);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index a48731d..9bbaa29 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -130,7 +130,7 @@ struct BlockDriver {
* instead.
*/
int coroutine_fn (*bdrv_co_write_zeroes)(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors);
+ int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs,
int64_t sector_num, int nb_sectors);
int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs,
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index 667f4e4..7e9fecb 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -442,7 +442,7 @@ static void coroutine_fn co_write_zeroes_entry(void *opaque)
CoWriteZeroes *data = opaque;
data->ret = bdrv_co_write_zeroes(data->bs, data->offset / BDRV_SECTOR_SIZE,
- data->count / BDRV_SECTOR_SIZE);
+ data->count / BDRV_SECTOR_SIZE, 0);
data->done = true;
if (data->ret < 0) {
*data->total = data->ret;
--
1.7.9.5
^ permalink raw reply related [flat|nested] 33+ messages in thread
* [Qemu-devel] [PATCHv7 03/17] block: introduce BDRV_REQ_MAY_UNMAP request flag
2013-10-24 10:06 [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements Peter Lieven
2013-10-24 10:06 ` [Qemu-devel] [PATCHv7 01/17] block: make BdrvRequestFlags public Peter Lieven
2013-10-24 10:06 ` [Qemu-devel] [PATCHv7 02/17] block: add flags to bdrv_*_write_zeroes Peter Lieven
@ 2013-10-24 10:06 ` Peter Lieven
2013-10-24 10:06 ` [Qemu-devel] [PATCHv7 04/17] block: add logical block provisioning info to BlockDriverInfo Peter Lieven
` (15 subsequent siblings)
18 siblings, 0 replies; 33+ messages in thread
From: Peter Lieven @ 2013-10-24 10:06 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, stefanha, Peter Lieven, ronniesahlberg, pbonzini
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Peter Lieven <pl@kamp.de>
---
block-migration.c | 3 ++-
block.c | 4 ++++
block/backup.c | 2 +-
include/block/block.h | 7 +++++++
4 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/block-migration.c b/block-migration.c
index 713a8e3..fc4ef93 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -780,7 +780,8 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
}
if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
- ret = bdrv_write_zeroes(bs, addr, nr_sectors, 0);
+ ret = bdrv_write_zeroes(bs, addr, nr_sectors,
+ BDRV_REQ_MAY_UNMAP);
} else {
buf = g_malloc(BLOCK_SIZE);
qemu_get_buffer(f, buf, BLOCK_SIZE);
diff --git a/block.c b/block.c
index 3259429..0d97ce6 100644
--- a/block.c
+++ b/block.c
@@ -2810,6 +2810,10 @@ int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
{
trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
+ if (!(bs->open_flags & BDRV_O_UNMAP)) {
+ flags &= ~BDRV_REQ_MAY_UNMAP;
+ }
+
return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
BDRV_REQ_ZERO_WRITE | flags);
}
diff --git a/block/backup.c b/block/backup.c
index 830a179..0198514 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -139,7 +139,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
ret = bdrv_co_write_zeroes(job->target,
start * BACKUP_SECTORS_PER_CLUSTER,
- n, 0);
+ n, BDRV_REQ_MAY_UNMAP);
} else {
ret = bdrv_co_writev(job->target,
start * BACKUP_SECTORS_PER_CLUSTER, n,
diff --git a/include/block/block.h b/include/block/block.h
index 8ba9f0c..1f30a56 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -65,6 +65,13 @@ typedef struct BlockDevOps {
typedef enum {
BDRV_REQ_COPY_ON_READ = 0x1,
BDRV_REQ_ZERO_WRITE = 0x2,
+ /* The BDRV_REQ_MAY_UNMAP flag is used to indicate that the block driver
+ * is allowed to optimize a write zeroes request by unmapping (discarding)
+ * blocks if it is guaranteed that the result will read back as
+ * zeroes. The flag is only passed to the driver if the block device is
+ * opened with BDRV_O_UNMAP.
+ */
+ BDRV_REQ_MAY_UNMAP = 0x4,
} BdrvRequestFlags;
#define BDRV_O_RDWR 0x0002
--
1.7.9.5
^ permalink raw reply related [flat|nested] 33+ messages in thread
* [Qemu-devel] [PATCHv7 04/17] block: add logical block provisioning info to BlockDriverInfo
2013-10-24 10:06 [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements Peter Lieven
` (2 preceding siblings ...)
2013-10-24 10:06 ` [Qemu-devel] [PATCHv7 03/17] block: introduce BDRV_REQ_MAY_UNMAP request flag Peter Lieven
@ 2013-10-24 10:06 ` Peter Lieven
2013-10-24 10:06 ` [Qemu-devel] [PATCHv7 05/17] block: add wrappers for logical block provisioning information Peter Lieven
` (14 subsequent siblings)
18 siblings, 0 replies; 33+ messages in thread
From: Peter Lieven @ 2013-10-24 10:06 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, stefanha, Peter Lieven, ronniesahlberg, pbonzini
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Peter Lieven <pl@kamp.de>
---
include/block/block.h | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/include/block/block.h b/include/block/block.h
index 1f30a56..9c76967 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -18,6 +18,22 @@ typedef struct BlockDriverInfo {
/* offset at which the VM state can be saved (0 if not possible) */
int64_t vm_state_offset;
bool is_dirty;
+ /*
+ * True if unallocated blocks read back as zeroes. This is equivalent
+ * to the the LBPRZ flag in the SCSI logical block provisioning page.
+ */
+ bool unallocated_blocks_are_zero;
+ /*
+ * True if the driver can optimize writing zeroes by unmapping
+ * sectors. This is equivalent to the BLKDISCARDZEROES ioctl in Linux
+ * with the difference that in qemu a discard is allowed to silently
+ * fail. Therefore we have to use bdrv_write_zeroes with the
+ * BDRV_REQ_MAY_UNMAP flag for an optimized zero write with unmapping.
+ * After this call the driver has to guarantee that the contents read
+ * back as zero. It is additionally required that the block device is
+ * opened with BDRV_O_UNMAP flag for this to work.
+ */
+ bool can_write_zeroes_with_unmap;
} BlockDriverInfo;
typedef struct BlockFragInfo {
--
1.7.9.5
^ permalink raw reply related [flat|nested] 33+ messages in thread
* [Qemu-devel] [PATCHv7 05/17] block: add wrappers for logical block provisioning information
2013-10-24 10:06 [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements Peter Lieven
` (3 preceding siblings ...)
2013-10-24 10:06 ` [Qemu-devel] [PATCHv7 04/17] block: add logical block provisioning info to BlockDriverInfo Peter Lieven
@ 2013-10-24 10:06 ` Peter Lieven
2013-10-24 10:06 ` [Qemu-devel] [PATCHv7 06/17] block/iscsi: add .bdrv_get_info Peter Lieven
` (13 subsequent siblings)
18 siblings, 0 replies; 33+ messages in thread
From: Peter Lieven @ 2013-10-24 10:06 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, stefanha, Peter Lieven, ronniesahlberg, pbonzini
This adds 2 wrappers to read the unallocated_blocks_are_zero and
can_write_zeroes_with_unmap info from the BDI. The wrappers are
required to check for the existence of a backing_hd and
if the devices are opened with the correct flags.
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Peter Lieven <pl@kamp.de>
---
block.c | 30 ++++++++++++++++++++++++++++++
include/block/block.h | 2 ++
2 files changed, 32 insertions(+)
diff --git a/block.c b/block.c
index 0d97ce6..0601b02 100644
--- a/block.c
+++ b/block.c
@@ -3094,6 +3094,36 @@ int bdrv_has_zero_init(BlockDriverState *bs)
return 0;
}
+bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
+{
+ BlockDriverInfo bdi;
+
+ if (bs->backing_hd) {
+ return false;
+ }
+
+ if (bdrv_get_info(bs, &bdi) == 0) {
+ return bdi.unallocated_blocks_are_zero;
+ }
+
+ return false;
+}
+
+bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
+{
+ BlockDriverInfo bdi;
+
+ if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
+ return false;
+ }
+
+ if (bdrv_get_info(bs, &bdi) == 0) {
+ return bdi.can_write_zeroes_with_unmap;
+ }
+
+ return false;
+}
+
typedef struct BdrvCoGetBlockStatusData {
BlockDriverState *bs;
BlockDriverState *base;
diff --git a/include/block/block.h b/include/block/block.h
index 9c76967..803c5ca 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -344,6 +344,8 @@ int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
int bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
int bdrv_has_zero_init_1(BlockDriverState *bs);
int bdrv_has_zero_init(BlockDriverState *bs);
+bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs);
+bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, int *pnum);
int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
--
1.7.9.5
^ permalink raw reply related [flat|nested] 33+ messages in thread
* [Qemu-devel] [PATCHv7 06/17] block/iscsi: add .bdrv_get_info
2013-10-24 10:06 [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements Peter Lieven
` (4 preceding siblings ...)
2013-10-24 10:06 ` [Qemu-devel] [PATCHv7 05/17] block: add wrappers for logical block provisioning information Peter Lieven
@ 2013-10-24 10:06 ` Peter Lieven
2013-10-24 10:06 ` [Qemu-devel] [PATCHv7 07/17] block: add BlockLimits structure to BlockDriverState Peter Lieven
` (12 subsequent siblings)
18 siblings, 0 replies; 33+ messages in thread
From: Peter Lieven @ 2013-10-24 10:06 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, stefanha, Peter Lieven, ronniesahlberg, pbonzini
Signed-off-by: Peter Lieven <pl@kamp.de>
---
block/iscsi.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/block/iscsi.c b/block/iscsi.c
index a2a961e..1dbbcad 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1506,6 +1506,14 @@ out:
return ret;
}
+static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ IscsiLun *iscsilun = bs->opaque;
+ bdi->unallocated_blocks_are_zero = !!iscsilun->lbprz;
+ bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws;
+ return 0;
+}
+
static QEMUOptionParameter iscsi_create_options[] = {
{
.name = BLOCK_OPT_SIZE,
@@ -1527,6 +1535,7 @@ static BlockDriver bdrv_iscsi = {
.create_options = iscsi_create_options,
.bdrv_getlength = iscsi_getlength,
+ .bdrv_get_info = iscsi_get_info,
.bdrv_truncate = iscsi_truncate,
#if defined(LIBISCSI_FEATURE_IOVECTOR)
--
1.7.9.5
^ permalink raw reply related [flat|nested] 33+ messages in thread
* [Qemu-devel] [PATCHv7 07/17] block: add BlockLimits structure to BlockDriverState
2013-10-24 10:06 [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements Peter Lieven
` (5 preceding siblings ...)
2013-10-24 10:06 ` [Qemu-devel] [PATCHv7 06/17] block/iscsi: add .bdrv_get_info Peter Lieven
@ 2013-10-24 10:06 ` Peter Lieven
2013-10-24 10:06 ` [Qemu-devel] [PATCHv7 08/17] block/raw: copy BlockLimits on raw_open Peter Lieven
` (11 subsequent siblings)
18 siblings, 0 replies; 33+ messages in thread
From: Peter Lieven @ 2013-10-24 10:06 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, stefanha, Peter Lieven, ronniesahlberg, pbonzini
this patch adds BlockLimits which introduces discard and write_zeroes
limits and alignment information to the BlockDriverState.
Signed-off-by: Peter Lieven <pl@kamp.de>
---
include/block/block_int.h | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 9bbaa29..33be247 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -227,6 +227,20 @@ struct BlockDriver {
QLIST_ENTRY(BlockDriver) list;
};
+typedef struct BlockLimits {
+ /* maximum number of sectors that can be discarded at once */
+ int max_discard;
+
+ /* optimal alignment for discard requests in sectors */
+ int64_t discard_alignment;
+
+ /* maximum number of sectors that can zeroized at once */
+ int max_write_zeroes;
+
+ /* optimal alignment for write zeroes requests in sectors */
+ int64_t write_zeroes_alignment;
+} BlockLimits;
+
/*
* Note: the function bdrv_append() copies and swaps contents of
* BlockDriverStates, so if you add new fields to this struct, please
@@ -280,6 +294,9 @@ struct BlockDriverState {
uint64_t total_time_ns[BDRV_MAX_IOTYPE];
uint64_t wr_highest_sector;
+ /* I/O Limits */
+ BlockLimits bl;
+
/* Whether the disk can expand beyond total_sectors */
int growable;
--
1.7.9.5
^ permalink raw reply related [flat|nested] 33+ messages in thread
* [Qemu-devel] [PATCHv7 08/17] block/raw: copy BlockLimits on raw_open
2013-10-24 10:06 [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements Peter Lieven
` (6 preceding siblings ...)
2013-10-24 10:06 ` [Qemu-devel] [PATCHv7 07/17] block: add BlockLimits structure to BlockDriverState Peter Lieven
@ 2013-10-24 10:06 ` Peter Lieven
2013-10-24 10:06 ` [Qemu-devel] [PATCHv7 09/17] block: honour BlockLimits in bdrv_co_do_write_zeroes Peter Lieven
` (10 subsequent siblings)
18 siblings, 0 replies; 33+ messages in thread
From: Peter Lieven @ 2013-10-24 10:06 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, stefanha, Peter Lieven, ronniesahlberg, pbonzini
Signed-off-by: Peter Lieven <pl@kamp.de>
---
block/raw_bsd.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index b0dd23f..49ac18c 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -150,6 +150,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
bs->sg = bs->file->sg;
+ bs->bl = bs->file->bl;
return 0;
}
--
1.7.9.5
^ permalink raw reply related [flat|nested] 33+ messages in thread
* [Qemu-devel] [PATCHv7 09/17] block: honour BlockLimits in bdrv_co_do_write_zeroes
2013-10-24 10:06 [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements Peter Lieven
` (7 preceding siblings ...)
2013-10-24 10:06 ` [Qemu-devel] [PATCHv7 08/17] block/raw: copy BlockLimits on raw_open Peter Lieven
@ 2013-10-24 10:06 ` Peter Lieven
2013-10-24 10:06 ` [Qemu-devel] [PATCHv7 10/17] block: honour BlockLimits in bdrv_co_discard Peter Lieven
` (9 subsequent siblings)
18 siblings, 0 replies; 33+ messages in thread
From: Peter Lieven @ 2013-10-24 10:06 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, stefanha, Peter Lieven, ronniesahlberg, pbonzini
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Peter Lieven <pl@kamp.de>
---
block.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++----------------
1 file changed, 49 insertions(+), 16 deletions(-)
diff --git a/block.c b/block.c
index 0601b02..0c0b0ac 100644
--- a/block.c
+++ b/block.c
@@ -2703,32 +2703,65 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
BDRV_REQ_COPY_ON_READ);
}
+/* if no limit is specified in the BlockLimits use a default
+ * of 32768 512-byte sectors (16 MiB) per request.
+ */
+#define MAX_WRITE_ZEROES_DEFAULT 32768
+
static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
{
BlockDriver *drv = bs->drv;
QEMUIOVector qiov;
- struct iovec iov;
- int ret;
+ struct iovec iov = {0};
+ int ret = 0;
- /* TODO Emulate only part of misaligned requests instead of letting block
- * drivers return -ENOTSUP and emulate everything */
+ int max_write_zeroes = bs->bl.max_write_zeroes ?
+ bs->bl.max_write_zeroes : MAX_WRITE_ZEROES_DEFAULT;
- /* First try the efficient write zeroes operation */
- if (drv->bdrv_co_write_zeroes) {
- ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
- if (ret != -ENOTSUP) {
- return ret;
+ while (nb_sectors > 0 && !ret) {
+ int num = nb_sectors;
+
+ /* align request */
+ if (bs->bl.write_zeroes_alignment &&
+ num >= bs->bl.write_zeroes_alignment &&
+ sector_num % bs->bl.write_zeroes_alignment) {
+ if (num > bs->bl.write_zeroes_alignment) {
+ num = bs->bl.write_zeroes_alignment;
+ }
+ num -= sector_num % bs->bl.write_zeroes_alignment;
}
- }
- /* Fall back to bounce buffer if write zeroes is unsupported */
- iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
- iov.iov_base = qemu_blockalign(bs, iov.iov_len);
- memset(iov.iov_base, 0, iov.iov_len);
- qemu_iovec_init_external(&qiov, &iov, 1);
+ /* limit request size */
+ if (num > max_write_zeroes) {
+ num = max_write_zeroes;
+ }
+
+ ret = -ENOTSUP;
+ /* First try the efficient write zeroes operation */
+ if (drv->bdrv_co_write_zeroes) {
+ ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
+ }
+
+ if (ret == -ENOTSUP) {
+ /* Fall back to bounce buffer if write zeroes is unsupported */
+ iov.iov_len = num * BDRV_SECTOR_SIZE;
+ if (iov.iov_base == NULL) {
+ /* allocate bounce buffer only once and ensure that it
+ * is big enough for this and all future requests.
+ */
+ size_t bufsize = num <= nb_sectors ? num : max_write_zeroes;
+ iov.iov_base = qemu_blockalign(bs, bufsize * BDRV_SECTOR_SIZE);
+ memset(iov.iov_base, 0, bufsize * BDRV_SECTOR_SIZE);
+ }
+ qemu_iovec_init_external(&qiov, &iov, 1);
- ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
+ ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
+ }
+
+ sector_num += num;
+ nb_sectors -= num;
+ }
qemu_vfree(iov.iov_base);
return ret;
--
1.7.9.5
^ permalink raw reply related [flat|nested] 33+ messages in thread
* [Qemu-devel] [PATCHv7 10/17] block: honour BlockLimits in bdrv_co_discard
2013-10-24 10:06 [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements Peter Lieven
` (8 preceding siblings ...)
2013-10-24 10:06 ` [Qemu-devel] [PATCHv7 09/17] block: honour BlockLimits in bdrv_co_do_write_zeroes Peter Lieven
@ 2013-10-24 10:06 ` Peter Lieven
2013-11-11 13:20 ` Kevin Wolf
2013-10-24 10:07 ` [Qemu-devel] [PATCHv7 11/17] iscsi: set limits in BlockDriverState Peter Lieven
` (8 subsequent siblings)
18 siblings, 1 reply; 33+ messages in thread
From: Peter Lieven @ 2013-10-24 10:06 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, stefanha, Peter Lieven, ronniesahlberg, pbonzini
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Peter Lieven <pl@kamp.de>
---
block.c | 37 ++++++++++++++++++++++++++++++++++++-
1 file changed, 36 insertions(+), 1 deletion(-)
diff --git a/block.c b/block.c
index 0c0b0ac..b28dd42 100644
--- a/block.c
+++ b/block.c
@@ -4234,6 +4234,11 @@ static void coroutine_fn bdrv_discard_co_entry(void *opaque)
rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
}
+/* if no limit is specified in the BlockLimits use a default
+ * of 32768 512-byte sectors (16 MiB) per request.
+ */
+#define MAX_DISCARD_DEFAULT 32768
+
int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
int nb_sectors)
{
@@ -4255,7 +4260,37 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
}
if (bs->drv->bdrv_co_discard) {
- return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
+ int max_discard = bs->bl.max_discard ?
+ bs->bl.max_discard : MAX_DISCARD_DEFAULT;
+
+ while (nb_sectors > 0) {
+ int ret;
+ int num = nb_sectors;
+
+ /* align request */
+ if (bs->bl.discard_alignment &&
+ num >= bs->bl.discard_alignment &&
+ sector_num % bs->bl.discard_alignment) {
+ if (num > bs->bl.discard_alignment) {
+ num = bs->bl.discard_alignment;
+ }
+ num -= sector_num % bs->bl.discard_alignment;
+ }
+
+ /* limit request size */
+ if (num > max_discard) {
+ num = max_discard;
+ }
+
+ ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
+ if (ret) {
+ return ret;
+ }
+
+ sector_num += num;
+ nb_sectors -= num;
+ }
+ return 0;
} else if (bs->drv->bdrv_aio_discard) {
BlockDriverAIOCB *acb;
CoroutineIOCompletion co = {
--
1.7.9.5
^ permalink raw reply related [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [PATCHv7 10/17] block: honour BlockLimits in bdrv_co_discard
2013-10-24 10:06 ` [Qemu-devel] [PATCHv7 10/17] block: honour BlockLimits in bdrv_co_discard Peter Lieven
@ 2013-11-11 13:20 ` Kevin Wolf
2013-11-12 8:53 ` Peter Lieven
0 siblings, 1 reply; 33+ messages in thread
From: Kevin Wolf @ 2013-11-11 13:20 UTC (permalink / raw)
To: Peter Lieven; +Cc: pbonzini, ronniesahlberg, qemu-devel, stefanha
Am 24.10.2013 um 12:06 hat Peter Lieven geschrieben:
> Reviewed-by: Eric Blake <eblake@redhat.com>
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
> block.c | 37 ++++++++++++++++++++++++++++++++++++-
> 1 file changed, 36 insertions(+), 1 deletion(-)
>
> diff --git a/block.c b/block.c
> index 0c0b0ac..b28dd42 100644
> --- a/block.c
> +++ b/block.c
> @@ -4234,6 +4234,11 @@ static void coroutine_fn bdrv_discard_co_entry(void *opaque)
> rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
> }
>
> +/* if no limit is specified in the BlockLimits use a default
> + * of 32768 512-byte sectors (16 MiB) per request.
> + */
> +#define MAX_DISCARD_DEFAULT 32768
> +
> int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
> int nb_sectors)
> {
> @@ -4255,7 +4260,37 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
> }
>
> if (bs->drv->bdrv_co_discard) {
> - return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
> + int max_discard = bs->bl.max_discard ?
> + bs->bl.max_discard : MAX_DISCARD_DEFAULT;
> +
> + while (nb_sectors > 0) {
> + int ret;
> + int num = nb_sectors;
> +
> + /* align request */
> + if (bs->bl.discard_alignment &&
> + num >= bs->bl.discard_alignment &&
> + sector_num % bs->bl.discard_alignment) {
> + if (num > bs->bl.discard_alignment) {
> + num = bs->bl.discard_alignment;
> + }
> + num -= sector_num % bs->bl.discard_alignment;
> + }
> +
> + /* limit request size */
> + if (num > max_discard) {
> + num = max_discard;
> + }
> +
> + ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
> + if (ret) {
> + return ret;
> + }
> +
> + sector_num += num;
> + nb_sectors -= num;
> + }
> + return 0;
> } else if (bs->drv->bdrv_aio_discard) {
> BlockDriverAIOCB *acb;
> CoroutineIOCompletion co = {
You're only optimising drivers which have a .bdrv_co_discard()
implementation, but not those with .bdrv_aio_discard(). Not very nice,
and it would have been easy to avoid this by putting the loop around the
whole if statement instead of inside the 'then' branch.
Kevin
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [PATCHv7 10/17] block: honour BlockLimits in bdrv_co_discard
2013-11-11 13:20 ` Kevin Wolf
@ 2013-11-12 8:53 ` Peter Lieven
2013-11-12 9:19 ` Paolo Bonzini
0 siblings, 1 reply; 33+ messages in thread
From: Peter Lieven @ 2013-11-12 8:53 UTC (permalink / raw)
To: Kevin Wolf; +Cc: pbonzini, ronniesahlberg, qemu-devel, stefanha
On 11.11.2013 14:20, Kevin Wolf wrote:
> Am 24.10.2013 um 12:06 hat Peter Lieven geschrieben:
>> Reviewed-by: Eric Blake <eblake@redhat.com>
>> Signed-off-by: Peter Lieven <pl@kamp.de>
>> ---
>> block.c | 37 ++++++++++++++++++++++++++++++++++++-
>> 1 file changed, 36 insertions(+), 1 deletion(-)
>>
>> diff --git a/block.c b/block.c
>> index 0c0b0ac..b28dd42 100644
>> --- a/block.c
>> +++ b/block.c
>> @@ -4234,6 +4234,11 @@ static void coroutine_fn bdrv_discard_co_entry(void *opaque)
>> rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
>> }
>>
>> +/* if no limit is specified in the BlockLimits use a default
>> + * of 32768 512-byte sectors (16 MiB) per request.
>> + */
>> +#define MAX_DISCARD_DEFAULT 32768
>> +
>> int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
>> int nb_sectors)
>> {
>> @@ -4255,7 +4260,37 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
>> }
>>
>> if (bs->drv->bdrv_co_discard) {
>> - return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
>> + int max_discard = bs->bl.max_discard ?
>> + bs->bl.max_discard : MAX_DISCARD_DEFAULT;
>> +
>> + while (nb_sectors > 0) {
>> + int ret;
>> + int num = nb_sectors;
>> +
>> + /* align request */
>> + if (bs->bl.discard_alignment &&
>> + num >= bs->bl.discard_alignment &&
>> + sector_num % bs->bl.discard_alignment) {
>> + if (num > bs->bl.discard_alignment) {
>> + num = bs->bl.discard_alignment;
>> + }
>> + num -= sector_num % bs->bl.discard_alignment;
>> + }
>> +
>> + /* limit request size */
>> + if (num > max_discard) {
>> + num = max_discard;
>> + }
>> +
>> + ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
>> + if (ret) {
>> + return ret;
>> + }
>> +
>> + sector_num += num;
>> + nb_sectors -= num;
>> + }
>> + return 0;
>> } else if (bs->drv->bdrv_aio_discard) {
>> BlockDriverAIOCB *acb;
>> CoroutineIOCompletion co = {
> You're only optimising drivers which have a .bdrv_co_discard()
> implementation, but not those with .bdrv_aio_discard(). Not very nice,
> and it would have been easy to avoid this by putting the loop around the
> whole if statement instead of inside the 'then' branch.
Good point. I wonder noone noticed before ;-)
Do you want me to respin or is ok to send a follow up patch?
Stefan has it already in block-next. This patch doesn't make
the situation worse and we need follow up patches for
all the drivers to supply alignment information anyway.
Peter
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [PATCHv7 10/17] block: honour BlockLimits in bdrv_co_discard
2013-11-12 8:53 ` Peter Lieven
@ 2013-11-12 9:19 ` Paolo Bonzini
2013-11-12 9:21 ` Peter Lieven
0 siblings, 1 reply; 33+ messages in thread
From: Paolo Bonzini @ 2013-11-12 9:19 UTC (permalink / raw)
To: Peter Lieven; +Cc: Kevin Wolf, ronniesahlberg, qemu-devel, stefanha
Il 12/11/2013 09:53, Peter Lieven ha scritto:
> Good point. I wonder noone noticed before ;-)
>
> Do you want me to respin or is ok to send a follow up patch?
> Stefan has it already in block-next. This patch doesn't make
> the situation worse and we need follow up patches for
> all the drivers to supply alignment information anyway.
I can do it too, since I'm working on follow-up patches anyway.
Paolo
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [PATCHv7 10/17] block: honour BlockLimits in bdrv_co_discard
2013-11-12 9:19 ` Paolo Bonzini
@ 2013-11-12 9:21 ` Peter Lieven
2013-11-12 9:45 ` Kevin Wolf
0 siblings, 1 reply; 33+ messages in thread
From: Peter Lieven @ 2013-11-12 9:21 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: Kevin Wolf, ronniesahlberg, qemu-devel, stefanha
On 12.11.2013 10:19, Paolo Bonzini wrote:
> Il 12/11/2013 09:53, Peter Lieven ha scritto:
>> Good point. I wonder noone noticed before ;-)
>>
>> Do you want me to respin or is ok to send a follow up patch?
>> Stefan has it already in block-next. This patch doesn't make
>> the situation worse and we need follow up patches for
>> all the drivers to supply alignment information anyway.
> I can do it too, since I'm working on follow-up patches anyway.
That would be great if Kevin does not insist on a respin.
Thank you.
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [PATCHv7 10/17] block: honour BlockLimits in bdrv_co_discard
2013-11-12 9:21 ` Peter Lieven
@ 2013-11-12 9:45 ` Kevin Wolf
0 siblings, 0 replies; 33+ messages in thread
From: Kevin Wolf @ 2013-11-12 9:45 UTC (permalink / raw)
To: Peter Lieven; +Cc: Paolo Bonzini, ronniesahlberg, qemu-devel, stefanha
Am 12.11.2013 um 10:21 hat Peter Lieven geschrieben:
> On 12.11.2013 10:19, Paolo Bonzini wrote:
> >Il 12/11/2013 09:53, Peter Lieven ha scritto:
> >>Good point. I wonder noone noticed before ;-)
> >>
> >>Do you want me to respin or is ok to send a follow up patch?
> >>Stefan has it already in block-next. This patch doesn't make
> >>the situation worse and we need follow up patches for
> >>all the drivers to supply alignment information anyway.
> >I can do it too, since I'm working on follow-up patches anyway.
> That would be great if Kevin does not insist on a respin.
A follow-up patch is fine.
Kevin
^ permalink raw reply [flat|nested] 33+ messages in thread
* [Qemu-devel] [PATCHv7 11/17] iscsi: set limits in BlockDriverState
2013-10-24 10:06 [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements Peter Lieven
` (9 preceding siblings ...)
2013-10-24 10:06 ` [Qemu-devel] [PATCHv7 10/17] block: honour BlockLimits in bdrv_co_discard Peter Lieven
@ 2013-10-24 10:07 ` Peter Lieven
2013-10-24 10:07 ` [Qemu-devel] [PATCHv7 12/17] iscsi: simplify iscsi_co_discard Peter Lieven
` (7 subsequent siblings)
18 siblings, 0 replies; 33+ messages in thread
From: Peter Lieven @ 2013-10-24 10:07 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, stefanha, Peter Lieven, ronniesahlberg, pbonzini
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Peter Lieven <pl@kamp.de>
---
block/iscsi.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/block/iscsi.c b/block/iscsi.c
index 1dbbcad..16d8052 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1384,6 +1384,20 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
sizeof(struct scsi_inquiry_block_limits));
scsi_free_scsi_task(task);
task = NULL;
+
+ if (iscsilun->bl.max_unmap < 0xffffffff) {
+ bs->bl.max_discard = sector_lun2qemu(iscsilun->bl.max_unmap,
+ iscsilun);
+ }
+ bs->bl.discard_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
+ iscsilun);
+
+ if (iscsilun->bl.max_ws_len < 0xffffffff) {
+ bs->bl.max_write_zeroes = sector_lun2qemu(iscsilun->bl.max_ws_len,
+ iscsilun);
+ }
+ bs->bl.write_zeroes_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
+ iscsilun);
}
#if defined(LIBISCSI_FEATURE_NOP_COUNTER)
--
1.7.9.5
^ permalink raw reply related [flat|nested] 33+ messages in thread
* [Qemu-devel] [PATCHv7 12/17] iscsi: simplify iscsi_co_discard
2013-10-24 10:06 [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements Peter Lieven
` (10 preceding siblings ...)
2013-10-24 10:07 ` [Qemu-devel] [PATCHv7 11/17] iscsi: set limits in BlockDriverState Peter Lieven
@ 2013-10-24 10:07 ` Peter Lieven
2013-10-24 10:07 ` [Qemu-devel] [PATCHv7 13/17] iscsi: add bdrv_co_write_zeroes Peter Lieven
` (6 subsequent siblings)
18 siblings, 0 replies; 33+ messages in thread
From: Peter Lieven @ 2013-10-24 10:07 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, stefanha, Peter Lieven, ronniesahlberg, pbonzini
now that bdrv_co_discard can handle limits we do not need
the request split logic here anymore.
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Peter Lieven <pl@kamp.de>
---
block/iscsi.c | 67 +++++++++++++++++++++------------------------------------
1 file changed, 25 insertions(+), 42 deletions(-)
diff --git a/block/iscsi.c b/block/iscsi.c
index 16d8052..c0465aa 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -87,7 +87,6 @@ typedef struct IscsiAIOCB {
#define NOP_INTERVAL 5000
#define MAX_NOP_FAILURES 3
#define ISCSI_CMD_RETRIES 5
-#define ISCSI_MAX_UNMAP 131072
static void
iscsi_bh_cb(void *p)
@@ -912,8 +911,6 @@ coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
IscsiLun *iscsilun = bs->opaque;
struct IscsiTask iTask;
struct unmap_list list;
- uint32_t nb_blocks;
- uint32_t max_unmap;
if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
return -EINVAL;
@@ -925,52 +922,38 @@ coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
}
list.lba = sector_qemu2lun(sector_num, iscsilun);
- nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
+ list.num = sector_qemu2lun(nb_sectors, iscsilun);
- max_unmap = iscsilun->bl.max_unmap;
- if (max_unmap == 0xffffffff) {
- max_unmap = ISCSI_MAX_UNMAP;
- }
-
- while (nb_blocks > 0) {
- iscsi_co_init_iscsitask(iscsilun, &iTask);
- list.num = nb_blocks;
- if (list.num > max_unmap) {
- list.num = max_unmap;
- }
+ iscsi_co_init_iscsitask(iscsilun, &iTask);
retry:
- if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
- iscsi_co_generic_cb, &iTask) == NULL) {
- return -EIO;
- }
-
- while (!iTask.complete) {
- iscsi_set_events(iscsilun);
- qemu_coroutine_yield();
- }
+ if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
+ iscsi_co_generic_cb, &iTask) == NULL) {
+ return -EIO;
+ }
- if (iTask.task != NULL) {
- scsi_free_scsi_task(iTask.task);
- iTask.task = NULL;
- }
+ while (!iTask.complete) {
+ iscsi_set_events(iscsilun);
+ qemu_coroutine_yield();
+ }
- if (iTask.do_retry) {
- goto retry;
- }
+ if (iTask.task != NULL) {
+ scsi_free_scsi_task(iTask.task);
+ iTask.task = NULL;
+ }
- if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
- /* the target might fail with a check condition if it
- is not happy with the alignment of the UNMAP request
- we silently fail in this case */
- return 0;
- }
+ if (iTask.do_retry) {
+ goto retry;
+ }
- if (iTask.status != SCSI_STATUS_GOOD) {
- return -EIO;
- }
+ if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
+ /* the target might fail with a check condition if it
+ is not happy with the alignment of the UNMAP request
+ we silently fail in this case */
+ return 0;
+ }
- list.lba += list.num;
- nb_blocks -= list.num;
+ if (iTask.status != SCSI_STATUS_GOOD) {
+ return -EIO;
}
return 0;
--
1.7.9.5
^ permalink raw reply related [flat|nested] 33+ messages in thread
* [Qemu-devel] [PATCHv7 13/17] iscsi: add bdrv_co_write_zeroes
2013-10-24 10:06 [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements Peter Lieven
` (11 preceding siblings ...)
2013-10-24 10:07 ` [Qemu-devel] [PATCHv7 12/17] iscsi: simplify iscsi_co_discard Peter Lieven
@ 2013-10-24 10:07 ` Peter Lieven
2013-10-24 10:07 ` [Qemu-devel] [PATCHv7 14/17] block: introduce bdrv_make_zero Peter Lieven
` (5 subsequent siblings)
18 siblings, 0 replies; 33+ messages in thread
From: Peter Lieven @ 2013-10-24 10:07 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, stefanha, Peter Lieven, ronniesahlberg, pbonzini
Signed-off-by: Peter Lieven <pl@kamp.de>
---
block/iscsi.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 64 insertions(+)
diff --git a/block/iscsi.c b/block/iscsi.c
index c0465aa..014475d 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -56,6 +56,7 @@ typedef struct IscsiLun {
uint8_t lbprz;
struct scsi_inquiry_logical_block_provisioning lbp;
struct scsi_inquiry_block_limits bl;
+ unsigned char *zeroblock;
} IscsiLun;
typedef struct IscsiTask {
@@ -959,6 +960,65 @@ retry:
return 0;
}
+#if defined(SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED)
+
+static int
+coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, BdrvRequestFlags flags)
+{
+ IscsiLun *iscsilun = bs->opaque;
+ struct IscsiTask iTask;
+ uint64_t lba;
+ uint32_t nb_blocks;
+
+ if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
+ return -EINVAL;
+ }
+
+ if (!iscsilun->lbp.lbpws) {
+ /* WRITE SAME is not supported by the target */
+ return -ENOTSUP;
+ }
+
+ lba = sector_qemu2lun(sector_num, iscsilun);
+ nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
+
+ if (iscsilun->zeroblock == NULL) {
+ iscsilun->zeroblock = g_malloc0(iscsilun->block_size);
+ }
+
+ iscsi_co_init_iscsitask(iscsilun, &iTask);
+retry:
+ if (iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
+ iscsilun->zeroblock, iscsilun->block_size,
+ nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
+ 0, 0, iscsi_co_generic_cb, &iTask) == NULL) {
+ return -EIO;
+ }
+
+ while (!iTask.complete) {
+ iscsi_set_events(iscsilun);
+ qemu_coroutine_yield();
+ }
+
+ if (iTask.task != NULL) {
+ scsi_free_scsi_task(iTask.task);
+ iTask.task = NULL;
+ }
+
+ if (iTask.do_retry) {
+ goto retry;
+ }
+
+ if (iTask.status != SCSI_STATUS_GOOD) {
+ return -EIO;
+ }
+
+ return 0;
+}
+
+#endif /* SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED */
+
static int parse_chap(struct iscsi_context *iscsi, const char *target)
{
QemuOptsList *list;
@@ -1421,6 +1481,7 @@ static void iscsi_close(BlockDriverState *bs)
}
qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL);
iscsi_destroy_context(iscsi);
+ g_free(iscsilun->zeroblock);
memset(iscsilun, 0, sizeof(IscsiLun));
}
@@ -1539,6 +1600,9 @@ static BlockDriver bdrv_iscsi = {
.bdrv_co_get_block_status = iscsi_co_get_block_status,
#endif
.bdrv_co_discard = iscsi_co_discard,
+#if defined(SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED)
+ .bdrv_co_write_zeroes = iscsi_co_write_zeroes,
+#endif
.bdrv_aio_readv = iscsi_aio_readv,
.bdrv_aio_writev = iscsi_aio_writev,
--
1.7.9.5
^ permalink raw reply related [flat|nested] 33+ messages in thread
* [Qemu-devel] [PATCHv7 14/17] block: introduce bdrv_make_zero
2013-10-24 10:06 [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements Peter Lieven
` (12 preceding siblings ...)
2013-10-24 10:07 ` [Qemu-devel] [PATCHv7 13/17] iscsi: add bdrv_co_write_zeroes Peter Lieven
@ 2013-10-24 10:07 ` Peter Lieven
2013-10-24 10:07 ` [Qemu-devel] [PATCHv7 15/17] block/get_block_status: fix BDRV_BLOCK_ZERO for unallocated blocks Peter Lieven
` (4 subsequent siblings)
18 siblings, 0 replies; 33+ messages in thread
From: Peter Lieven @ 2013-10-24 10:07 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, stefanha, Peter Lieven, ronniesahlberg, pbonzini
this patch adds a call to completely zero out a block device.
the operation is sped up by checking the block status and
only writing zeroes to the device if they currently do not
return zeroes. optionally the zero writing can be sped up
by setting the flag BDRV_REQ_MAY_UNMAP to emulate the zero
write by unmapping if the driver supports it.
Signed-off-by: Peter Lieven <pl@kamp.de>
---
block.c | 37 +++++++++++++++++++++++++++++++++++++
include/block/block.h | 1 +
2 files changed, 38 insertions(+)
diff --git a/block.c b/block.c
index b28dd42..21a992a 100644
--- a/block.c
+++ b/block.c
@@ -2391,6 +2391,43 @@ int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
BDRV_REQ_ZERO_WRITE | flags);
}
+/*
+ * Completely zero out a block device with the help of bdrv_write_zeroes.
+ * The operation is sped up by checking the block status and only writing
+ * zeroes to the device if they currently do not return zeroes. Optional
+ * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
+ *
+ * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
+ */
+int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
+{
+ int64_t target_size = bdrv_getlength(bs) / BDRV_SECTOR_SIZE;
+ int64_t ret, nb_sectors, sector_num = 0;
+ int n;
+
+ for (;;) {
+ nb_sectors = target_size - sector_num;
+ if (nb_sectors <= 0) {
+ return 0;
+ }
+ if (nb_sectors > INT_MAX) {
+ nb_sectors = INT_MAX;
+ }
+ ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
+ if (ret & BDRV_BLOCK_ZERO) {
+ sector_num += n;
+ continue;
+ }
+ ret = bdrv_write_zeroes(bs, sector_num, n, flags);
+ if (ret < 0) {
+ error_report("error writing zeroes at sector %" PRId64 ": %s",
+ sector_num, strerror(-ret));
+ return ret;
+ }
+ sector_num += n;
+ }
+}
+
int bdrv_pread(BlockDriverState *bs, int64_t offset,
void *buf, int count1)
{
diff --git a/include/block/block.h b/include/block/block.h
index 803c5ca..4d9e67c 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -216,6 +216,7 @@ int bdrv_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors);
int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, BdrvRequestFlags flags);
+int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags);
int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov);
int bdrv_pread(BlockDriverState *bs, int64_t offset,
void *buf, int count);
--
1.7.9.5
^ permalink raw reply related [flat|nested] 33+ messages in thread
* [Qemu-devel] [PATCHv7 15/17] block/get_block_status: fix BDRV_BLOCK_ZERO for unallocated blocks
2013-10-24 10:06 [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements Peter Lieven
` (13 preceding siblings ...)
2013-10-24 10:07 ` [Qemu-devel] [PATCHv7 14/17] block: introduce bdrv_make_zero Peter Lieven
@ 2013-10-24 10:07 ` Peter Lieven
2013-10-24 10:07 ` [Qemu-devel] [PATCHv7 16/17] qemu-img: add support for fully allocated images Peter Lieven
` (3 subsequent siblings)
18 siblings, 0 replies; 33+ messages in thread
From: Peter Lieven @ 2013-10-24 10:07 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, stefanha, Peter Lieven, ronniesahlberg, pbonzini
this patch does 2 things:
a) only do additional call outs if BDRV_BLOCK_ZERO is not already set.
b) use the newly introduced bdrv_unallocated_blocks_are_zero()
to return the zero state of an unallocated block. the used callout
to bdrv_has_zero_init() is only valid right after bdrv_create.
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Peter Lieven <pl@kamp.de>
---
block.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/block.c b/block.c
index 21a992a..69a2d2b 100644
--- a/block.c
+++ b/block.c
@@ -3263,8 +3263,8 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
*pnum, pnum);
}
- if (!(ret & BDRV_BLOCK_DATA)) {
- if (bdrv_has_zero_init(bs)) {
+ if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
+ if (bdrv_unallocated_blocks_are_zero(bs)) {
ret |= BDRV_BLOCK_ZERO;
} else if (bs->backing_hd) {
BlockDriverState *bs2 = bs->backing_hd;
--
1.7.9.5
^ permalink raw reply related [flat|nested] 33+ messages in thread
* [Qemu-devel] [PATCHv7 16/17] qemu-img: add support for fully allocated images
2013-10-24 10:06 [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements Peter Lieven
` (14 preceding siblings ...)
2013-10-24 10:07 ` [Qemu-devel] [PATCHv7 15/17] block/get_block_status: fix BDRV_BLOCK_ZERO for unallocated blocks Peter Lieven
@ 2013-10-24 10:07 ` Peter Lieven
2013-10-24 10:07 ` [Qemu-devel] [PATCHv7 17/17] qemu-img: conditionally zero out target on convert Peter Lieven
` (2 subsequent siblings)
18 siblings, 0 replies; 33+ messages in thread
From: Peter Lieven @ 2013-10-24 10:07 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, stefanha, Peter Lieven, ronniesahlberg, pbonzini
Signed-off-by: Peter Lieven <pl@kamp.de>
---
qemu-img.c | 10 +++++++---
qemu-img.texi | 6 ++++++
2 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/qemu-img.c b/qemu-img.c
index 926f0a0..7f08364 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -100,8 +100,12 @@ static void help(void)
" '-h' with or without a command shows this help and lists the supported formats\n"
" '-p' show progress of command (only certain commands)\n"
" '-q' use Quiet mode - do not print any output (except errors)\n"
- " '-S' indicates the consecutive number of bytes that must contain only zeros\n"
- " for qemu-img to create a sparse image during conversion\n"
+ " '-S' indicates the consecutive number of bytes (defaults to 4k) that must\n"
+ " contain only zeros for qemu-img to create a sparse image during\n"
+ " conversion. If the number of bytes is 0, the source will not be scanned for\n"
+ " unallocated or zero sectors, and the destination image will always be\n"
+ " fully allocated\n"
+ " images will always be fully allocated\n"
" '--output' takes the format in which the output must be done (human or json)\n"
" '-n' skips the target volume creation (useful if the volume is created\n"
" prior to running qemu-img)\n"
@@ -1465,7 +1469,7 @@ static int img_convert(int argc, char **argv)
/* signal EOF to align */
bdrv_write_compressed(out_bs, 0, NULL, 0);
} else {
- int has_zero_init = bdrv_has_zero_init(out_bs);
+ int has_zero_init = min_sparse ? bdrv_has_zero_init(out_bs) : 0;
sector_num = 0; // total number of sectors converted so far
nb_sectors = total_sectors - sector_num;
diff --git a/qemu-img.texi b/qemu-img.texi
index 768054e..da36975 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -193,6 +193,12 @@ Image conversion is also useful to get smaller image when using a
growable format such as @code{qcow} or @code{cow}: the empty sectors
are detected and suppressed from the destination image.
+@var{sparse_size} indicates the consecutive number of bytes (defaults to 4k)
+that must contain only zeros for qemu-img to create a sparse image during
+conversion. If @var{sparse_size} is 0, the source will not be scanned for
+unallocated or zero sectors, and the destination image will always be
+fully allocated.
+
You can use the @var{backing_file} option to force the output image to be
created as a copy on write image of the specified base image; the
@var{backing_file} should have the same content as the input's base image,
--
1.7.9.5
^ permalink raw reply related [flat|nested] 33+ messages in thread
* [Qemu-devel] [PATCHv7 17/17] qemu-img: conditionally zero out target on convert
2013-10-24 10:06 [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements Peter Lieven
` (15 preceding siblings ...)
2013-10-24 10:07 ` [Qemu-devel] [PATCHv7 16/17] qemu-img: add support for fully allocated images Peter Lieven
@ 2013-10-24 10:07 ` Peter Lieven
2013-10-24 14:20 ` [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements Paolo Bonzini
2013-11-06 13:08 ` Stefan Hajnoczi
18 siblings, 0 replies; 33+ messages in thread
From: Peter Lieven @ 2013-10-24 10:07 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, stefanha, Peter Lieven, ronniesahlberg, pbonzini
If the target has_zero_init = 0, but supports efficiently
writing zeroes by unmapping we call bdrv_make_zero to
avoid fully allocating the target. This currently works
only for iscsi. It can be extended to raw with
BLKDISCARDZEROES for example.
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Peter Lieven <pl@kamp.de>
---
qemu-img.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/qemu-img.c b/qemu-img.c
index 7f08364..bec6da3 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1355,7 +1355,7 @@ static int img_convert(int argc, char **argv)
}
}
- flags = BDRV_O_RDWR;
+ flags = min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR;
ret = bdrv_parse_cache_flags(cache, &flags);
if (ret < 0) {
error_report("Invalid cache option: %s", cache);
@@ -1471,6 +1471,14 @@ static int img_convert(int argc, char **argv)
} else {
int has_zero_init = min_sparse ? bdrv_has_zero_init(out_bs) : 0;
+ if (!has_zero_init && bdrv_can_write_zeroes_with_unmap(out_bs)) {
+ ret = bdrv_make_zero(out_bs, BDRV_REQ_MAY_UNMAP);
+ if (ret < 0) {
+ goto out;
+ }
+ has_zero_init = 1;
+ }
+
sector_num = 0; // total number of sectors converted so far
nb_sectors = total_sectors - sector_num;
if (nb_sectors != 0) {
--
1.7.9.5
^ permalink raw reply related [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements
2013-10-24 10:06 [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements Peter Lieven
` (16 preceding siblings ...)
2013-10-24 10:07 ` [Qemu-devel] [PATCHv7 17/17] qemu-img: conditionally zero out target on convert Peter Lieven
@ 2013-10-24 14:20 ` Paolo Bonzini
2013-11-06 13:08 ` Stefan Hajnoczi
18 siblings, 0 replies; 33+ messages in thread
From: Paolo Bonzini @ 2013-10-24 14:20 UTC (permalink / raw)
To: Peter Lieven; +Cc: kwolf, ronniesahlberg, qemu-devel, stefanha
Il 24/10/2013 11:06, Peter Lieven ha scritto:
> this patch adds the ability for targets to stay sparse during
> block migration (if the zero_blocks capability is set) and qemu-img convert
> even if the target does not have has_zero_init = 1.
>
> the series was especially developed for iSCSI, but it should also work
> with other drivers with little or no adjustments. these adjustments
> should be limited to providing block provisioning information through
> get_block_info and/or honouring BDRV_REQ_MAY_UNMAP on writing zeroes.
>
> v6->v7:
> - switched position of "iscsi: set limits in BlockDriverState" and
> "iscsi: simplify iscsi_co_discard". (Paolo)
> - fixed commit message of
> "block/get_block_status: fix BDRV_BLOCK_ZERO for unallocated blocks".
> (Paolo)
> - moved "block/raw: copy BlockLimits on raw_open" right after
> "block: add BlockLimits structure to BlockDriverState". (Paolo)
> - Reworded desciption for -S 0 in
> "qemu-img: add support for fully allocated images" as suggested
> by Paolo.
> - Reworded commit message of:
> "qemu-img: conditionally zero out target on convert".
> regarding iscsi (Paolo)
>
> v5->v6:
> - protected iscsi_co_write_zeroes by the existence of the
> SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED macro. This is ugly
> but necessary because the semantic of iscsi_writesame16_task
> silently changed between libiscsi 1.8.0 and 1.9.0. The above
> macro was the first added after the change. I already contacted
> Ronnie to introduce an API version macro which has to be bumped
> on each new function that will be added. Changes to the parameters
> should not happen at all of course.
>
> v4->v5:
> - new patches 4-6 to move the block provisioning information
> to the BlockDriverInfo.
> - kept 2 wrappers to read the information from the BDI and
> renamed them to make more clear what they do:
>
> bdrv_has_discard_zeroes -> bdrv_unallocated_blocks_are_zero
> bdrv_has_discard_write_zeroes -> bdrv_can_write_zeroes_with_unmap
>
> - added additional information about the 2 flags in the
> BDI struct in block.h
>
> v3->v4:
> - changed BlockLimits struct to typedef (Stefan, Eric)
> - renamed bdrv_zeroize to bdrv_make_zero (Stefan)
> - added comment about the -S flag of qemu-img convert in
> qemu-img.texi (Eric)
> - used struct assignment for bs->bl in raw_open (Stefan, Eric)
> - dropped 3 get_block_status fixes that are independent of
> this series and already partly merged.
>
> v2->v3:
> - fix merge conflict in block/qcow2_cluster.c
> - changed return type of bdrv_has_discard_zeroes and
> bdrv_has_discard_write_zeroes to bool.
> - moved alignment and limits info to a BlockLimits struct (Paolo).
> - added magic constanst for default maximum in bdrv_co_do_write_zeroes
> and bdrv_co_discard (Eric).
> - bdrv_co_do_write_zeroes: allocating the bounce buffer only once (Eric),
> fixed bounce iov_len in the fall back path.
> - bdrv_zeroize: added inline docu (Eric) and do not mask flags passed
> to bdrv_write_zeroes (Eric).
> - qemu-img: changed the default hint for -S (min_sparse) in the usage
> help to 4k. not changing the default as it is unclear why this default
> was set. size suffixes are already supported (Eric).
>
> v1->v2:
> - moved block max_discard and max_write_zeroes to BlockDriverState
> - added discard_alignment and write_zeroes_alignment to BlockDriverState
> - added bdrv_has_discard_zeroes() and bdrv_has_discard_write_zeroes()
> - added logic to bdrv_co_discard and bdrv_co_do_write_zeroes to honour
> limit and alignment info.
> - added support for -S 0 in qemu-img convert.
>
> Peter Lieven (17):
> block: make BdrvRequestFlags public
> block: add flags to bdrv_*_write_zeroes
> block: introduce BDRV_REQ_MAY_UNMAP request flag
> block: add logical block provisioning info to BlockDriverInfo
> block: add wrappers for logical block provisioning information
> block/iscsi: add .bdrv_get_info
> block: add BlockLimits structure to BlockDriverState
> block/raw: copy BlockLimits on raw_open
> block: honour BlockLimits in bdrv_co_do_write_zeroes
> block: honour BlockLimits in bdrv_co_discard
> iscsi: set limits in BlockDriverState
> iscsi: simplify iscsi_co_discard
> iscsi: add bdrv_co_write_zeroes
> block: introduce bdrv_make_zero
> block/get_block_status: fix BDRV_BLOCK_ZERO for unallocated blocks
> qemu-img: add support for fully allocated images
> qemu-img: conditionally zero out target on convert
>
> block-migration.c | 3 +-
> block.c | 200 +++++++++++++++++++++++++++++++++++++--------
> block/backup.c | 3 +-
> block/iscsi.c | 150 +++++++++++++++++++++++++---------
> block/qcow2-cluster.c | 2 +-
> block/qcow2.c | 2 +-
> block/qed.c | 3 +-
> block/raw_bsd.c | 6 +-
> block/vmdk.c | 3 +-
> include/block/block.h | 35 +++++++-
> include/block/block_int.h | 19 ++++-
> qemu-img.c | 20 ++++-
> qemu-img.texi | 6 ++
> qemu-io-cmds.c | 2 +-
> 14 files changed, 366 insertions(+), 88 deletions(-)
>
This looks good to me.
Paolo
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements
2013-10-24 10:06 [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements Peter Lieven
` (17 preceding siblings ...)
2013-10-24 14:20 ` [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements Paolo Bonzini
@ 2013-11-06 13:08 ` Stefan Hajnoczi
2013-11-06 13:09 ` Paolo Bonzini
2013-11-06 19:36 ` Peter Lieven
18 siblings, 2 replies; 33+ messages in thread
From: Stefan Hajnoczi @ 2013-11-06 13:08 UTC (permalink / raw)
To: Peter Lieven; +Cc: kwolf, pbonzini, ronniesahlberg, qemu-devel, stefanha
On Thu, Oct 24, 2013 at 12:06:49PM +0200, Peter Lieven wrote:
> this patch adds the ability for targets to stay sparse during
> block migration (if the zero_blocks capability is set) and qemu-img convert
> even if the target does not have has_zero_init = 1.
>
> the series was especially developed for iSCSI, but it should also work
> with other drivers with little or no adjustments. these adjustments
> should be limited to providing block provisioning information through
> get_block_info and/or honouring BDRV_REQ_MAY_UNMAP on writing zeroes.
>
> v6->v7:
> - switched position of "iscsi: set limits in BlockDriverState" and
> "iscsi: simplify iscsi_co_discard". (Paolo)
> - fixed commit message of
> "block/get_block_status: fix BDRV_BLOCK_ZERO for unallocated blocks".
> (Paolo)
> - moved "block/raw: copy BlockLimits on raw_open" right after
> "block: add BlockLimits structure to BlockDriverState". (Paolo)
> - Reworded desciption for -S 0 in
> "qemu-img: add support for fully allocated images" as suggested
> by Paolo.
> - Reworded commit message of:
> "qemu-img: conditionally zero out target on convert".
> regarding iscsi (Paolo)
>
> v5->v6:
> - protected iscsi_co_write_zeroes by the existence of the
> SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED macro. This is ugly
> but necessary because the semantic of iscsi_writesame16_task
> silently changed between libiscsi 1.8.0 and 1.9.0. The above
> macro was the first added after the change. I already contacted
> Ronnie to introduce an API version macro which has to be bumped
> on each new function that will be added. Changes to the parameters
> should not happen at all of course.
>
> v4->v5:
> - new patches 4-6 to move the block provisioning information
> to the BlockDriverInfo.
> - kept 2 wrappers to read the information from the BDI and
> renamed them to make more clear what they do:
>
> bdrv_has_discard_zeroes -> bdrv_unallocated_blocks_are_zero
> bdrv_has_discard_write_zeroes -> bdrv_can_write_zeroes_with_unmap
>
> - added additional information about the 2 flags in the
> BDI struct in block.h
>
> v3->v4:
> - changed BlockLimits struct to typedef (Stefan, Eric)
> - renamed bdrv_zeroize to bdrv_make_zero (Stefan)
> - added comment about the -S flag of qemu-img convert in
> qemu-img.texi (Eric)
> - used struct assignment for bs->bl in raw_open (Stefan, Eric)
> - dropped 3 get_block_status fixes that are independent of
> this series and already partly merged.
>
> v2->v3:
> - fix merge conflict in block/qcow2_cluster.c
> - changed return type of bdrv_has_discard_zeroes and
> bdrv_has_discard_write_zeroes to bool.
> - moved alignment and limits info to a BlockLimits struct (Paolo).
> - added magic constanst for default maximum in bdrv_co_do_write_zeroes
> and bdrv_co_discard (Eric).
> - bdrv_co_do_write_zeroes: allocating the bounce buffer only once (Eric),
> fixed bounce iov_len in the fall back path.
> - bdrv_zeroize: added inline docu (Eric) and do not mask flags passed
> to bdrv_write_zeroes (Eric).
> - qemu-img: changed the default hint for -S (min_sparse) in the usage
> help to 4k. not changing the default as it is unclear why this default
> was set. size suffixes are already supported (Eric).
>
> v1->v2:
> - moved block max_discard and max_write_zeroes to BlockDriverState
> - added discard_alignment and write_zeroes_alignment to BlockDriverState
> - added bdrv_has_discard_zeroes() and bdrv_has_discard_write_zeroes()
> - added logic to bdrv_co_discard and bdrv_co_do_write_zeroes to honour
> limit and alignment info.
> - added support for -S 0 in qemu-img convert.
>
> Peter Lieven (17):
> block: make BdrvRequestFlags public
> block: add flags to bdrv_*_write_zeroes
> block: introduce BDRV_REQ_MAY_UNMAP request flag
> block: add logical block provisioning info to BlockDriverInfo
> block: add wrappers for logical block provisioning information
> block/iscsi: add .bdrv_get_info
> block: add BlockLimits structure to BlockDriverState
> block/raw: copy BlockLimits on raw_open
> block: honour BlockLimits in bdrv_co_do_write_zeroes
> block: honour BlockLimits in bdrv_co_discard
> iscsi: set limits in BlockDriverState
> iscsi: simplify iscsi_co_discard
> iscsi: add bdrv_co_write_zeroes
> block: introduce bdrv_make_zero
> block/get_block_status: fix BDRV_BLOCK_ZERO for unallocated blocks
> qemu-img: add support for fully allocated images
> qemu-img: conditionally zero out target on convert
>
> block-migration.c | 3 +-
> block.c | 200 +++++++++++++++++++++++++++++++++++++--------
> block/backup.c | 3 +-
> block/iscsi.c | 150 +++++++++++++++++++++++++---------
> block/qcow2-cluster.c | 2 +-
> block/qcow2.c | 2 +-
> block/qed.c | 3 +-
> block/raw_bsd.c | 6 +-
> block/vmdk.c | 3 +-
> include/block/block.h | 35 +++++++-
> include/block/block_int.h | 19 ++++-
> qemu-img.c | 20 ++++-
> qemu-img.texi | 6 ++
> qemu-io-cmds.c | 2 +-
> 14 files changed, 366 insertions(+), 88 deletions(-)
Thanks, applied to my block-next tree:
https://github.com/stefanha/qemu/commits/block-next
This will go into QEMU 1.8. Since it's a new feature that touches core
block layer code and several block drivers, I can't merge it into
1.7-rc.
Stefan
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements
2013-11-06 13:08 ` Stefan Hajnoczi
@ 2013-11-06 13:09 ` Paolo Bonzini
2013-11-06 19:38 ` Peter Lieven
2013-11-06 19:36 ` Peter Lieven
1 sibling, 1 reply; 33+ messages in thread
From: Paolo Bonzini @ 2013-11-06 13:09 UTC (permalink / raw)
To: Stefan Hajnoczi; +Cc: kwolf, ronniesahlberg, Peter Lieven, qemu-devel, stefanha
Il 06/11/2013 14:08, Stefan Hajnoczi ha scritto:
> Thanks, applied to my block-next tree:
> https://github.com/stefanha/qemu/commits/block-next
>
> This will go into QEMU 1.8. Since it's a new feature that touches core
> block layer code and several block drivers, I can't merge it into
> 1.7-rc.
Yay! With these patches I can properly emulate WRITE SAME in the SCSI
layer.
Paolo
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements
2013-11-06 13:09 ` Paolo Bonzini
@ 2013-11-06 19:38 ` Peter Lieven
2013-11-06 21:16 ` Paolo Bonzini
0 siblings, 1 reply; 33+ messages in thread
From: Peter Lieven @ 2013-11-06 19:38 UTC (permalink / raw)
To: Paolo Bonzini
Cc: kwolf, Stefan Hajnoczi, ronniesahlberg, qemu-devel, stefanha
Am 06.11.2013 um 14:09 schrieb Paolo Bonzini <pbonzini@redhat.com>:
> Il 06/11/2013 14:08, Stefan Hajnoczi ha scritto:
>> Thanks, applied to my block-next tree:
>> https://github.com/stefanha/qemu/commits/block-next
>>
>> This will go into QEMU 1.8. Since it's a new feature that touches core
>> block layer code and several block drivers, I can't merge it into
>> 1.7-rc.
>
> Yay! With these patches I can properly emulate WRITE SAME in the SCSI
> layer.
Yes, you can, but only when write same is used to write zeroes. The other
case is still unhandled or needs emulation.
Peter
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements
2013-11-06 19:38 ` Peter Lieven
@ 2013-11-06 21:16 ` Paolo Bonzini
2013-11-08 17:52 ` ronnie sahlberg
0 siblings, 1 reply; 33+ messages in thread
From: Paolo Bonzini @ 2013-11-06 21:16 UTC (permalink / raw)
To: Peter Lieven; +Cc: kwolf, Stefan Hajnoczi, stefanha, qemu-devel, ronniesahlberg
Il 06/11/2013 20:38, Peter Lieven ha scritto:
>
> Am 06.11.2013 um 14:09 schrieb Paolo Bonzini <pbonzini@redhat.com>:
>
>> Il 06/11/2013 14:08, Stefan Hajnoczi ha scritto:
>>> Thanks, applied to my block-next tree:
>>> https://github.com/stefanha/qemu/commits/block-next
>>>
>>> This will go into QEMU 1.8. Since it's a new feature that touches core
>>> block layer code and several block drivers, I can't merge it into
>>> 1.7-rc.
>>
>> Yay! With these patches I can properly emulate WRITE SAME in the SCSI
>> layer.
>
> Yes, you can, but only when write same is used to write zeroes. The other
> case is still unhandled or needs emulation.
WRITE SAME with UNMAP is incorrect in the current QEMU code. Your patch
let me fix it so that the payload can be read back with no change, *and*
sectors are unmapped when possible.
Paolo
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements
2013-11-06 21:16 ` Paolo Bonzini
@ 2013-11-08 17:52 ` ronnie sahlberg
2013-11-08 18:03 ` ronnie sahlberg
2013-11-08 19:28 ` Peter Lieven
0 siblings, 2 replies; 33+ messages in thread
From: ronnie sahlberg @ 2013-11-08 17:52 UTC (permalink / raw)
To: Paolo Bonzini
Cc: Kevin Wolf, Stefan Hajnoczi, Peter Lieven, qemu-devel,
Stefan Hajnoczi
For better support for older versions of libiscsi
I think it would be good to convert all the
iscsi_unmap_task/iscsi_writesame*_task/iscsi_*_task functions with
calls to the much more genric iscsi_scsi_command_sync().
iscsi_scsi_command_sync() and iscsi_scsi_command_async() have been
available since prior to version 1.1 and can be used to send arbitrary
scsi opcodes to the target.
iscsi_scsi_command_async() is already used instead of
iscs_read16/write16_async() since the read16/write16 helpers were
added to libiscsi at a much later stage and there are examples of its
use.
Using iscsi_scsi_command_[a]sync() instead of for example
iscsi_unmap_task() would mean that you can use the UNMAP opcode
always, regardless version of libiscsi.
It would mean that you need to build the CDB directly inside
block/iscsi.c but that is not hard.
I.e. change block/iscsi.c to only use iscsi_scsi_command_[a]sync()
for all opcodes it wants to send to the target.
On Wed, Nov 6, 2013 at 1:16 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Il 06/11/2013 20:38, Peter Lieven ha scritto:
>>
>> Am 06.11.2013 um 14:09 schrieb Paolo Bonzini <pbonzini@redhat.com>:
>>
>>> Il 06/11/2013 14:08, Stefan Hajnoczi ha scritto:
>>>> Thanks, applied to my block-next tree:
>>>> https://github.com/stefanha/qemu/commits/block-next
>>>>
>>>> This will go into QEMU 1.8. Since it's a new feature that touches core
>>>> block layer code and several block drivers, I can't merge it into
>>>> 1.7-rc.
>>>
>>> Yay! With these patches I can properly emulate WRITE SAME in the SCSI
>>> layer.
>>
>> Yes, you can, but only when write same is used to write zeroes. The other
>> case is still unhandled or needs emulation.
>
> WRITE SAME with UNMAP is incorrect in the current QEMU code. Your patch
> let me fix it so that the payload can be read back with no change, *and*
> sectors are unmapped when possible.
>
> Paolo
>
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements
2013-11-08 17:52 ` ronnie sahlberg
@ 2013-11-08 18:03 ` ronnie sahlberg
2013-11-08 19:29 ` Peter Lieven
2013-11-08 19:28 ` Peter Lieven
1 sibling, 1 reply; 33+ messages in thread
From: ronnie sahlberg @ 2013-11-08 18:03 UTC (permalink / raw)
To: Paolo Bonzini
Cc: Kevin Wolf, Stefan Hajnoczi, Peter Lieven, qemu-devel,
Stefan Hajnoczi
It would mean that any version of libiscsi from 1.1 or later would
work and there would not be the issues such as
UNMAP is only available from 1.2 and forward, WRITESAME* is only
availabel from 1.3.
SANITIZE only being available from 1.9 ...
On Fri, Nov 8, 2013 at 9:52 AM, ronnie sahlberg
<ronniesahlberg@gmail.com> wrote:
> For better support for older versions of libiscsi
> I think it would be good to convert all the
> iscsi_unmap_task/iscsi_writesame*_task/iscsi_*_task functions with
> calls to the much more genric iscsi_scsi_command_sync().
>
> iscsi_scsi_command_sync() and iscsi_scsi_command_async() have been
> available since prior to version 1.1 and can be used to send arbitrary
> scsi opcodes to the target.
>
>
> iscsi_scsi_command_async() is already used instead of
> iscs_read16/write16_async() since the read16/write16 helpers were
> added to libiscsi at a much later stage and there are examples of its
> use.
>
>
> Using iscsi_scsi_command_[a]sync() instead of for example
> iscsi_unmap_task() would mean that you can use the UNMAP opcode
> always, regardless version of libiscsi.
> It would mean that you need to build the CDB directly inside
> block/iscsi.c but that is not hard.
>
>
> I.e. change block/iscsi.c to only use iscsi_scsi_command_[a]sync()
> for all opcodes it wants to send to the target.
>
>
>
>
> On Wed, Nov 6, 2013 at 1:16 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> Il 06/11/2013 20:38, Peter Lieven ha scritto:
>>>
>>> Am 06.11.2013 um 14:09 schrieb Paolo Bonzini <pbonzini@redhat.com>:
>>>
>>>> Il 06/11/2013 14:08, Stefan Hajnoczi ha scritto:
>>>>> Thanks, applied to my block-next tree:
>>>>> https://github.com/stefanha/qemu/commits/block-next
>>>>>
>>>>> This will go into QEMU 1.8. Since it's a new feature that touches core
>>>>> block layer code and several block drivers, I can't merge it into
>>>>> 1.7-rc.
>>>>
>>>> Yay! With these patches I can properly emulate WRITE SAME in the SCSI
>>>> layer.
>>>
>>> Yes, you can, but only when write same is used to write zeroes. The other
>>> case is still unhandled or needs emulation.
>>
>> WRITE SAME with UNMAP is incorrect in the current QEMU code. Your patch
>> let me fix it so that the payload can be read back with no change, *and*
>> sectors are unmapped when possible.
>>
>> Paolo
>>
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements
2013-11-08 18:03 ` ronnie sahlberg
@ 2013-11-08 19:29 ` Peter Lieven
0 siblings, 0 replies; 33+ messages in thread
From: Peter Lieven @ 2013-11-08 19:29 UTC (permalink / raw)
To: ronnie sahlberg
Cc: Kevin Wolf, Paolo Bonzini, qemu-devel, Stefan Hajnoczi,
Stefan Hajnoczi
Am 08.11.2013 um 19:03 schrieb ronnie sahlberg <ronniesahlberg@gmail.com>:
> It would mean that any version of libiscsi from 1.1 or later would
> work and there would not be the issues such as
> UNMAP is only available from 1.2 and forward, WRITESAME* is only
> availabel from 1.3.
I think 1.3.0 is not a big requirement when the recent version is 1.8.0
> SANITIZE only being available from 1.9 …
Sanitize we do not use at all currently.
Peter
>
>
> On Fri, Nov 8, 2013 at 9:52 AM, ronnie sahlberg
> <ronniesahlberg@gmail.com> wrote:
>> For better support for older versions of libiscsi
>> I think it would be good to convert all the
>> iscsi_unmap_task/iscsi_writesame*_task/iscsi_*_task functions with
>> calls to the much more genric iscsi_scsi_command_sync().
>>
>> iscsi_scsi_command_sync() and iscsi_scsi_command_async() have been
>> available since prior to version 1.1 and can be used to send arbitrary
>> scsi opcodes to the target.
>>
>>
>> iscsi_scsi_command_async() is already used instead of
>> iscs_read16/write16_async() since the read16/write16 helpers were
>> added to libiscsi at a much later stage and there are examples of its
>> use.
>>
>>
>> Using iscsi_scsi_command_[a]sync() instead of for example
>> iscsi_unmap_task() would mean that you can use the UNMAP opcode
>> always, regardless version of libiscsi.
>> It would mean that you need to build the CDB directly inside
>> block/iscsi.c but that is not hard.
>>
>>
>> I.e. change block/iscsi.c to only use iscsi_scsi_command_[a]sync()
>> for all opcodes it wants to send to the target.
>>
>>
>>
>>
>> On Wed, Nov 6, 2013 at 1:16 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>>> Il 06/11/2013 20:38, Peter Lieven ha scritto:
>>>>
>>>> Am 06.11.2013 um 14:09 schrieb Paolo Bonzini <pbonzini@redhat.com>:
>>>>
>>>>> Il 06/11/2013 14:08, Stefan Hajnoczi ha scritto:
>>>>>> Thanks, applied to my block-next tree:
>>>>>> https://github.com/stefanha/qemu/commits/block-next
>>>>>>
>>>>>> This will go into QEMU 1.8. Since it's a new feature that touches core
>>>>>> block layer code and several block drivers, I can't merge it into
>>>>>> 1.7-rc.
>>>>>
>>>>> Yay! With these patches I can properly emulate WRITE SAME in the SCSI
>>>>> layer.
>>>>
>>>> Yes, you can, but only when write same is used to write zeroes. The other
>>>> case is still unhandled or needs emulation.
>>>
>>> WRITE SAME with UNMAP is incorrect in the current QEMU code. Your patch
>>> let me fix it so that the payload can be read back with no change, *and*
>>> sectors are unmapped when possible.
>>>
>>> Paolo
>>>
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements
2013-11-08 17:52 ` ronnie sahlberg
2013-11-08 18:03 ` ronnie sahlberg
@ 2013-11-08 19:28 ` Peter Lieven
1 sibling, 0 replies; 33+ messages in thread
From: Peter Lieven @ 2013-11-08 19:28 UTC (permalink / raw)
To: ronnie sahlberg
Cc: Kevin Wolf, Paolo Bonzini, qemu-devel, Stefan Hajnoczi,
Stefan Hajnoczi
Am 08.11.2013 um 18:52 schrieb ronnie sahlberg <ronniesahlberg@gmail.com>:
> For better support for older versions of libiscsi
> I think it would be good to convert all the
> iscsi_unmap_task/iscsi_writesame*_task/iscsi_*_task functions with
> calls to the much more genric iscsi_scsi_command_sync().
>
> iscsi_scsi_command_sync() and iscsi_scsi_command_async() have been
> available since prior to version 1.1 and can be used to send arbitrary
> scsi opcodes to the target.
>
>
> iscsi_scsi_command_async() is already used instead of
> iscs_read16/write16_async() since the read16/write16 helpers were
> added to libiscsi at a much later stage and there are examples of its
> use.
>
>
> Using iscsi_scsi_command_[a]sync() instead of for example
> iscsi_unmap_task() would mean that you can use the UNMAP opcode
> always, regardless version of libiscsi.
> It would mean that you need to build the CDB directly inside
> block/iscsi.c but that is not hard.
>
>
> I.e. change block/iscsi.c to only use iscsi_scsi_command_[a]sync()
> for all opcodes it wants to send to the target.
>
>
>
Hi Ronnie,
i would prefer to use the helper functions. It makes the code much, much
easier to read.
In general you are talking about the case that we have a totally
outdated libiscsi version, but a absolutely up to date qemu version.
If someone wants to compile a recent qemu on an old distro, this
is working, but simply not with all features enabled.
We keep basic support for read, write, flush. This works with any
libiscsi version. If someone wants to have support for write_zeroes,
discard or get_block_status he simply must use a recent libiscsi
version.
Peter
>
> On Wed, Nov 6, 2013 at 1:16 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> Il 06/11/2013 20:38, Peter Lieven ha scritto:
>>>
>>> Am 06.11.2013 um 14:09 schrieb Paolo Bonzini <pbonzini@redhat.com>:
>>>
>>>> Il 06/11/2013 14:08, Stefan Hajnoczi ha scritto:
>>>>> Thanks, applied to my block-next tree:
>>>>> https://github.com/stefanha/qemu/commits/block-next
>>>>>
>>>>> This will go into QEMU 1.8. Since it's a new feature that touches core
>>>>> block layer code and several block drivers, I can't merge it into
>>>>> 1.7-rc.
>>>>
>>>> Yay! With these patches I can properly emulate WRITE SAME in the SCSI
>>>> layer.
>>>
>>> Yes, you can, but only when write same is used to write zeroes. The other
>>> case is still unhandled or needs emulation.
>>
>> WRITE SAME with UNMAP is incorrect in the current QEMU code. Your patch
>> let me fix it so that the payload can be read back with no change, *and*
>> sectors are unmapped when possible.
>>
>> Paolo
>>
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements
2013-11-06 13:08 ` Stefan Hajnoczi
2013-11-06 13:09 ` Paolo Bonzini
@ 2013-11-06 19:36 ` Peter Lieven
1 sibling, 0 replies; 33+ messages in thread
From: Peter Lieven @ 2013-11-06 19:36 UTC (permalink / raw)
To: Stefan Hajnoczi; +Cc: kwolf, pbonzini, ronniesahlberg, qemu-devel, stefanha
Am 06.11.2013 um 14:08 schrieb Stefan Hajnoczi <stefanha@gmail.com>:
> On Thu, Oct 24, 2013 at 12:06:49PM +0200, Peter Lieven wrote:
>> this patch adds the ability for targets to stay sparse during
>> block migration (if the zero_blocks capability is set) and qemu-img convert
>> even if the target does not have has_zero_init = 1.
>>
>> the series was especially developed for iSCSI, but it should also work
>> with other drivers with little or no adjustments. these adjustments
>> should be limited to providing block provisioning information through
>> get_block_info and/or honouring BDRV_REQ_MAY_UNMAP on writing zeroes.
>>
>> v6->v7:
>> - switched position of "iscsi: set limits in BlockDriverState" and
>> "iscsi: simplify iscsi_co_discard". (Paolo)
>> - fixed commit message of
>> "block/get_block_status: fix BDRV_BLOCK_ZERO for unallocated blocks".
>> (Paolo)
>> - moved "block/raw: copy BlockLimits on raw_open" right after
>> "block: add BlockLimits structure to BlockDriverState". (Paolo)
>> - Reworded desciption for -S 0 in
>> "qemu-img: add support for fully allocated images" as suggested
>> by Paolo.
>> - Reworded commit message of:
>> "qemu-img: conditionally zero out target on convert".
>> regarding iscsi (Paolo)
>>
>> v5->v6:
>> - protected iscsi_co_write_zeroes by the existence of the
>> SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED macro. This is ugly
>> but necessary because the semantic of iscsi_writesame16_task
>> silently changed between libiscsi 1.8.0 and 1.9.0. The above
>> macro was the first added after the change. I already contacted
>> Ronnie to introduce an API version macro which has to be bumped
>> on each new function that will be added. Changes to the parameters
>> should not happen at all of course.
>>
>> v4->v5:
>> - new patches 4-6 to move the block provisioning information
>> to the BlockDriverInfo.
>> - kept 2 wrappers to read the information from the BDI and
>> renamed them to make more clear what they do:
>>
>> bdrv_has_discard_zeroes -> bdrv_unallocated_blocks_are_zero
>> bdrv_has_discard_write_zeroes -> bdrv_can_write_zeroes_with_unmap
>>
>> - added additional information about the 2 flags in the
>> BDI struct in block.h
>>
>> v3->v4:
>> - changed BlockLimits struct to typedef (Stefan, Eric)
>> - renamed bdrv_zeroize to bdrv_make_zero (Stefan)
>> - added comment about the -S flag of qemu-img convert in
>> qemu-img.texi (Eric)
>> - used struct assignment for bs->bl in raw_open (Stefan, Eric)
>> - dropped 3 get_block_status fixes that are independent of
>> this series and already partly merged.
>>
>> v2->v3:
>> - fix merge conflict in block/qcow2_cluster.c
>> - changed return type of bdrv_has_discard_zeroes and
>> bdrv_has_discard_write_zeroes to bool.
>> - moved alignment and limits info to a BlockLimits struct (Paolo).
>> - added magic constanst for default maximum in bdrv_co_do_write_zeroes
>> and bdrv_co_discard (Eric).
>> - bdrv_co_do_write_zeroes: allocating the bounce buffer only once (Eric),
>> fixed bounce iov_len in the fall back path.
>> - bdrv_zeroize: added inline docu (Eric) and do not mask flags passed
>> to bdrv_write_zeroes (Eric).
>> - qemu-img: changed the default hint for -S (min_sparse) in the usage
>> help to 4k. not changing the default as it is unclear why this default
>> was set. size suffixes are already supported (Eric).
>>
>> v1->v2:
>> - moved block max_discard and max_write_zeroes to BlockDriverState
>> - added discard_alignment and write_zeroes_alignment to BlockDriverState
>> - added bdrv_has_discard_zeroes() and bdrv_has_discard_write_zeroes()
>> - added logic to bdrv_co_discard and bdrv_co_do_write_zeroes to honour
>> limit and alignment info.
>> - added support for -S 0 in qemu-img convert.
>>
>> Peter Lieven (17):
>> block: make BdrvRequestFlags public
>> block: add flags to bdrv_*_write_zeroes
>> block: introduce BDRV_REQ_MAY_UNMAP request flag
>> block: add logical block provisioning info to BlockDriverInfo
>> block: add wrappers for logical block provisioning information
>> block/iscsi: add .bdrv_get_info
>> block: add BlockLimits structure to BlockDriverState
>> block/raw: copy BlockLimits on raw_open
>> block: honour BlockLimits in bdrv_co_do_write_zeroes
>> block: honour BlockLimits in bdrv_co_discard
>> iscsi: set limits in BlockDriverState
>> iscsi: simplify iscsi_co_discard
>> iscsi: add bdrv_co_write_zeroes
>> block: introduce bdrv_make_zero
>> block/get_block_status: fix BDRV_BLOCK_ZERO for unallocated blocks
>> qemu-img: add support for fully allocated images
>> qemu-img: conditionally zero out target on convert
>>
>> block-migration.c | 3 +-
>> block.c | 200 +++++++++++++++++++++++++++++++++++++--------
>> block/backup.c | 3 +-
>> block/iscsi.c | 150 +++++++++++++++++++++++++---------
>> block/qcow2-cluster.c | 2 +-
>> block/qcow2.c | 2 +-
>> block/qed.c | 3 +-
>> block/raw_bsd.c | 6 +-
>> block/vmdk.c | 3 +-
>> include/block/block.h | 35 +++++++-
>> include/block/block_int.h | 19 ++++-
>> qemu-img.c | 20 ++++-
>> qemu-img.texi | 6 ++
>> qemu-io-cmds.c | 2 +-
>> 14 files changed, 366 insertions(+), 88 deletions(-)
>
> Thanks, applied to my block-next tree:
> https://github.com/stefanha/qemu/commits/block-next
>
> This will go into QEMU 1.8. Since it's a new feature that touches core
> block layer code and several block drivers, I can't merge it into
> 1.7-rc.
Thats clear. Thank you for applying. As soon as this is the the official
git I will reach out for the block driver maintainers and ask them to
supply information for discard and write zero alignment + maxima.
They might also want to extend their BlockDriverInfo to reflect
their drivers abilities regarding unallocated_blocks_are_zero
and can_write_zeroes_with_unmap. Hopefully we get this
completed until 1.8 comes out.
Thanks
Peter
^ permalink raw reply [flat|nested] 33+ messages in thread