* [Qemu-devel] [PATCH 01/11] gluster: Use pkg-config to configure GlusterFS block driver
2013-07-19 8:38 [Qemu-devel] [PULL for-1.6 00/11] Block patches Stefan Hajnoczi
@ 2013-07-19 8:38 ` Stefan Hajnoczi
2013-07-19 8:38 ` [Qemu-devel] [PATCH 02/11] gluster: Add discard support for " Stefan Hajnoczi
` (9 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Stefan Hajnoczi @ 2013-07-19 8:38 UTC (permalink / raw)
To: qemu-devel; +Cc: Anthony Liguori, Stefan Hajnoczi, Bharata B Rao
From: Bharata B Rao <bharata@linux.vnet.ibm.com>
Use pkg-config to determine the version and library dependency
for GlusterFS block driver.
Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
configure | 17 ++++++-----------
1 file changed, 6 insertions(+), 11 deletions(-)
diff --git a/configure b/configure
index 9e1cd19..cf13493 100755
--- a/configure
+++ b/configure
@@ -2570,23 +2570,18 @@ fi
##########################################
# glusterfs probe
if test "$glusterfs" != "no" ; then
- cat > $TMPC <<EOF
-#include <glusterfs/api/glfs.h>
-int main(void) {
- (void) glfs_new("volume");
- return 0;
-}
-EOF
- glusterfs_libs="-lgfapi -lgfrpc -lgfxdr"
- if compile_prog "" "$glusterfs_libs" ; then
- glusterfs=yes
+ if $pkg_config --atleast-version=3 glusterfs-api >/dev/null 2>&1; then
+ glusterfs="yes"
+ glusterfs_cflags=`$pkg_config --cflags glusterfs-api 2>/dev/null`
+ glusterfs_libs=`$pkg_config --libs glusterfs-api 2>/dev/null`
+ CFLAGS="$CFLAGS $glusterfs_cflags"
libs_tools="$glusterfs_libs $libs_tools"
libs_softmmu="$glusterfs_libs $libs_softmmu"
else
if test "$glusterfs" = "yes" ; then
feature_not_found "GlusterFS backend support"
fi
- glusterfs=no
+ glusterfs="no"
fi
fi
--
1.8.1.4
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH 02/11] gluster: Add discard support for GlusterFS block driver.
2013-07-19 8:38 [Qemu-devel] [PULL for-1.6 00/11] Block patches Stefan Hajnoczi
2013-07-19 8:38 ` [Qemu-devel] [PATCH 01/11] gluster: Use pkg-config to configure GlusterFS block driver Stefan Hajnoczi
@ 2013-07-19 8:38 ` Stefan Hajnoczi
2013-07-19 8:38 ` [Qemu-devel] [PATCH 03/11] dataplane: sync virtio.c and vring.c virtqueue state Stefan Hajnoczi
` (8 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Stefan Hajnoczi @ 2013-07-19 8:38 UTC (permalink / raw)
To: qemu-devel; +Cc: Anthony Liguori, Stefan Hajnoczi, Bharata B Rao
From: Bharata B Rao <bharata@linux.vnet.ibm.com>
Implement bdrv_aio_discard for gluster.
Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
block/gluster.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
configure | 8 ++++++++
2 files changed, 53 insertions(+)
diff --git a/block/gluster.c b/block/gluster.c
index 61424bc..6de418c 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -532,6 +532,39 @@ out:
return NULL;
}
+#ifdef CONFIG_GLUSTERFS_DISCARD
+static BlockDriverAIOCB *qemu_gluster_aio_discard(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ int ret;
+ GlusterAIOCB *acb;
+ BDRVGlusterState *s = bs->opaque;
+ size_t size;
+ off_t offset;
+
+ offset = sector_num * BDRV_SECTOR_SIZE;
+ size = nb_sectors * BDRV_SECTOR_SIZE;
+
+ acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
+ acb->size = 0;
+ acb->ret = 0;
+ acb->finished = NULL;
+ s->qemu_aio_count++;
+
+ ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
+ if (ret < 0) {
+ goto out;
+ }
+ return &acb->common;
+
+out:
+ s->qemu_aio_count--;
+ qemu_aio_release(acb);
+ return NULL;
+}
+#endif
+
static int64_t qemu_gluster_getlength(BlockDriverState *bs)
{
BDRVGlusterState *s = bs->opaque;
@@ -602,6 +635,9 @@ static BlockDriver bdrv_gluster = {
.bdrv_aio_writev = qemu_gluster_aio_writev,
.bdrv_aio_flush = qemu_gluster_aio_flush,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
+#ifdef CONFIG_GLUSTERFS_DISCARD
+ .bdrv_aio_discard = qemu_gluster_aio_discard,
+#endif
.create_options = qemu_gluster_create_options,
};
@@ -618,6 +654,9 @@ static BlockDriver bdrv_gluster_tcp = {
.bdrv_aio_writev = qemu_gluster_aio_writev,
.bdrv_aio_flush = qemu_gluster_aio_flush,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
+#ifdef CONFIG_GLUSTERFS_DISCARD
+ .bdrv_aio_discard = qemu_gluster_aio_discard,
+#endif
.create_options = qemu_gluster_create_options,
};
@@ -634,6 +673,9 @@ static BlockDriver bdrv_gluster_unix = {
.bdrv_aio_writev = qemu_gluster_aio_writev,
.bdrv_aio_flush = qemu_gluster_aio_flush,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
+#ifdef CONFIG_GLUSTERFS_DISCARD
+ .bdrv_aio_discard = qemu_gluster_aio_discard,
+#endif
.create_options = qemu_gluster_create_options,
};
@@ -650,6 +692,9 @@ static BlockDriver bdrv_gluster_rdma = {
.bdrv_aio_writev = qemu_gluster_aio_writev,
.bdrv_aio_flush = qemu_gluster_aio_flush,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
+#ifdef CONFIG_GLUSTERFS_DISCARD
+ .bdrv_aio_discard = qemu_gluster_aio_discard,
+#endif
.create_options = qemu_gluster_create_options,
};
diff --git a/configure b/configure
index cf13493..7c45db2 100755
--- a/configure
+++ b/configure
@@ -237,6 +237,7 @@ libiscsi=""
coroutine=""
seccomp=""
glusterfs=""
+glusterfs_discard="no"
virtio_blk_data_plane=""
gtk=""
gtkabi="2.0"
@@ -2577,6 +2578,9 @@ if test "$glusterfs" != "no" ; then
CFLAGS="$CFLAGS $glusterfs_cflags"
libs_tools="$glusterfs_libs $libs_tools"
libs_softmmu="$glusterfs_libs $libs_softmmu"
+ if $pkg_config --atleast-version=5 glusterfs-api >/dev/null 2>&1; then
+ glusterfs_discard="yes"
+ fi
else
if test "$glusterfs" = "yes" ; then
feature_not_found "GlusterFS backend support"
@@ -3964,6 +3968,10 @@ if test "$glusterfs" = "yes" ; then
echo "CONFIG_GLUSTERFS=y" >> $config_host_mak
fi
+if test "$glusterfs_discard" = "yes" ; then
+ echo "CONFIG_GLUSTERFS_DISCARD=y" >> $config_host_mak
+fi
+
if test "$libssh2" = "yes" ; then
echo "CONFIG_LIBSSH2=y" >> $config_host_mak
fi
--
1.8.1.4
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH 03/11] dataplane: sync virtio.c and vring.c virtqueue state
2013-07-19 8:38 [Qemu-devel] [PULL for-1.6 00/11] Block patches Stefan Hajnoczi
2013-07-19 8:38 ` [Qemu-devel] [PATCH 01/11] gluster: Use pkg-config to configure GlusterFS block driver Stefan Hajnoczi
2013-07-19 8:38 ` [Qemu-devel] [PATCH 02/11] gluster: Add discard support for " Stefan Hajnoczi
@ 2013-07-19 8:38 ` Stefan Hajnoczi
2013-07-19 8:38 ` [Qemu-devel] [PATCH 04/11] QEMUBH: make AioContext's bh re-entrant Stefan Hajnoczi
` (7 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Stefan Hajnoczi @ 2013-07-19 8:38 UTC (permalink / raw)
To: qemu-devel; +Cc: Anthony Liguori, Stefan Hajnoczi
Load the virtio.c state into vring.c when we start dataplane mode and
vice versa when stopping dataplane mode. This patch makes it possible
to start and stop dataplane any time while the guest is running.
This will eventually allow us to go back to QEMU main loop for
bdrv_drain_all() and live migration. In the meantime, this patch makes
the dataplane lifecycle more robust but should make no visible
difference. It may be useful in the virtio-net dataplane effort.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
hw/block/dataplane/virtio-blk.c | 2 +-
hw/virtio/dataplane/vring.c | 8 +++++---
include/hw/virtio/dataplane/vring.h | 2 +-
3 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 0356665..2faed43 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -537,7 +537,7 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
/* Clean up guest notifier (irq) */
k->set_guest_notifiers(qbus->parent, 1, false);
- vring_teardown(&s->vring);
+ vring_teardown(&s->vring, s->vdev, 0);
s->started = false;
s->stopping = false;
}
diff --git a/hw/virtio/dataplane/vring.c b/hw/virtio/dataplane/vring.c
index e0d6e83..82cc151 100644
--- a/hw/virtio/dataplane/vring.c
+++ b/hw/virtio/dataplane/vring.c
@@ -39,8 +39,8 @@ bool vring_setup(Vring *vring, VirtIODevice *vdev, int n)
vring_init(&vring->vr, virtio_queue_get_num(vdev, n), vring_ptr, 4096);
- vring->last_avail_idx = 0;
- vring->last_used_idx = 0;
+ vring->last_avail_idx = virtio_queue_get_last_avail_idx(vdev, n);
+ vring->last_used_idx = vring->vr.used->idx;
vring->signalled_used = 0;
vring->signalled_used_valid = false;
@@ -49,8 +49,10 @@ bool vring_setup(Vring *vring, VirtIODevice *vdev, int n)
return true;
}
-void vring_teardown(Vring *vring)
+void vring_teardown(Vring *vring, VirtIODevice *vdev, int n)
{
+ virtio_queue_set_last_avail_idx(vdev, n, vring->last_avail_idx);
+
hostmem_finalize(&vring->hostmem);
}
diff --git a/include/hw/virtio/dataplane/vring.h b/include/hw/virtio/dataplane/vring.h
index 9380cb5..c0b69ff 100644
--- a/include/hw/virtio/dataplane/vring.h
+++ b/include/hw/virtio/dataplane/vring.h
@@ -50,7 +50,7 @@ static inline void vring_set_broken(Vring *vring)
}
bool vring_setup(Vring *vring, VirtIODevice *vdev, int n);
-void vring_teardown(Vring *vring);
+void vring_teardown(Vring *vring, VirtIODevice *vdev, int n);
void vring_disable_notification(VirtIODevice *vdev, Vring *vring);
bool vring_enable_notification(VirtIODevice *vdev, Vring *vring);
bool vring_should_notify(VirtIODevice *vdev, Vring *vring);
--
1.8.1.4
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH 04/11] QEMUBH: make AioContext's bh re-entrant
2013-07-19 8:38 [Qemu-devel] [PULL for-1.6 00/11] Block patches Stefan Hajnoczi
` (2 preceding siblings ...)
2013-07-19 8:38 ` [Qemu-devel] [PATCH 03/11] dataplane: sync virtio.c and vring.c virtqueue state Stefan Hajnoczi
@ 2013-07-19 8:38 ` Stefan Hajnoczi
2013-07-19 8:38 ` [Qemu-devel] [PATCH 05/11] block: fix vvfat error path for enable_write_target Stefan Hajnoczi
` (6 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Stefan Hajnoczi @ 2013-07-19 8:38 UTC (permalink / raw)
To: qemu-devel; +Cc: Anthony Liguori, Liu Ping Fan, Liu Ping Fan, Stefan Hajnoczi
From: Liu Ping Fan <qemulist@gmail.com>
BH will be used outside big lock, so introduce lock to protect
between the writers, ie, bh's adders and deleter. The lock only
affects the writers and bh's callback does not take this extra lock.
Note that for the same AioContext, aio_bh_poll() can not run in
parallel yet.
Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
async.c | 33 +++++++++++++++++++++++++++++++--
include/block/aio.h | 7 +++++++
2 files changed, 38 insertions(+), 2 deletions(-)
diff --git a/async.c b/async.c
index 90fe906..5ce3633 100644
--- a/async.c
+++ b/async.c
@@ -47,11 +47,16 @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
bh->ctx = ctx;
bh->cb = cb;
bh->opaque = opaque;
+ qemu_mutex_lock(&ctx->bh_lock);
bh->next = ctx->first_bh;
+ /* Make sure that the members are ready before putting bh into list */
+ smp_wmb();
ctx->first_bh = bh;
+ qemu_mutex_unlock(&ctx->bh_lock);
return bh;
}
+/* Multiple occurrences of aio_bh_poll cannot be called concurrently */
int aio_bh_poll(AioContext *ctx)
{
QEMUBH *bh, **bhp, *next;
@@ -61,9 +66,15 @@ int aio_bh_poll(AioContext *ctx)
ret = 0;
for (bh = ctx->first_bh; bh; bh = next) {
+ /* Make sure that fetching bh happens before accessing its members */
+ smp_read_barrier_depends();
next = bh->next;
if (!bh->deleted && bh->scheduled) {
bh->scheduled = 0;
+ /* Paired with write barrier in bh schedule to ensure reading for
+ * idle & callbacks coming after bh's scheduling.
+ */
+ smp_rmb();
if (!bh->idle)
ret = 1;
bh->idle = 0;
@@ -75,6 +86,7 @@ int aio_bh_poll(AioContext *ctx)
/* remove deleted bhs */
if (!ctx->walking_bh) {
+ qemu_mutex_lock(&ctx->bh_lock);
bhp = &ctx->first_bh;
while (*bhp) {
bh = *bhp;
@@ -85,6 +97,7 @@ int aio_bh_poll(AioContext *ctx)
bhp = &bh->next;
}
}
+ qemu_mutex_unlock(&ctx->bh_lock);
}
return ret;
@@ -94,24 +107,38 @@ void qemu_bh_schedule_idle(QEMUBH *bh)
{
if (bh->scheduled)
return;
- bh->scheduled = 1;
bh->idle = 1;
+ /* Make sure that idle & any writes needed by the callback are done
+ * before the locations are read in the aio_bh_poll.
+ */
+ smp_wmb();
+ bh->scheduled = 1;
}
void qemu_bh_schedule(QEMUBH *bh)
{
if (bh->scheduled)
return;
- bh->scheduled = 1;
bh->idle = 0;
+ /* Make sure that idle & any writes needed by the callback are done
+ * before the locations are read in the aio_bh_poll.
+ */
+ smp_wmb();
+ bh->scheduled = 1;
aio_notify(bh->ctx);
}
+
+/* This func is async.
+ */
void qemu_bh_cancel(QEMUBH *bh)
{
bh->scheduled = 0;
}
+/* This func is async.The bottom half will do the delete action at the finial
+ * end.
+ */
void qemu_bh_delete(QEMUBH *bh)
{
bh->scheduled = 0;
@@ -176,6 +203,7 @@ aio_ctx_finalize(GSource *source)
thread_pool_free(ctx->thread_pool);
aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL);
event_notifier_cleanup(&ctx->notifier);
+ qemu_mutex_destroy(&ctx->bh_lock);
g_array_free(ctx->pollfds, TRUE);
}
@@ -211,6 +239,7 @@ AioContext *aio_context_new(void)
ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
ctx->thread_pool = NULL;
+ qemu_mutex_init(&ctx->bh_lock);
event_notifier_init(&ctx->notifier, false);
aio_set_event_notifier(ctx, &ctx->notifier,
(EventNotifierHandler *)
diff --git a/include/block/aio.h b/include/block/aio.h
index 1836793..cc77771 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -17,6 +17,7 @@
#include "qemu-common.h"
#include "qemu/queue.h"
#include "qemu/event_notifier.h"
+#include "qemu/thread.h"
typedef struct BlockDriverAIOCB BlockDriverAIOCB;
typedef void BlockDriverCompletionFunc(void *opaque, int ret);
@@ -53,6 +54,8 @@ typedef struct AioContext {
*/
int walking_handlers;
+ /* lock to protect between bh's adders and deleter */
+ QemuMutex bh_lock;
/* Anchor of the list of Bottom Halves belonging to the context */
struct QEMUBH *first_bh;
@@ -127,6 +130,8 @@ void aio_notify(AioContext *ctx);
* aio_bh_poll: Poll bottom halves for an AioContext.
*
* These are internal functions used by the QEMU main loop.
+ * And notice that multiple occurrences of aio_bh_poll cannot
+ * be called concurrently
*/
int aio_bh_poll(AioContext *ctx);
@@ -163,6 +168,8 @@ void qemu_bh_cancel(QEMUBH *bh);
* Deleting a bottom half frees the memory that was allocated for it by
* qemu_bh_new. It also implies canceling the bottom half if it was
* scheduled.
+ * This func is async. The bottom half will do the delete action at the finial
+ * end.
*
* @bh: The bottom half to be deleted.
*/
--
1.8.1.4
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH 05/11] block: fix vvfat error path for enable_write_target
2013-07-19 8:38 [Qemu-devel] [PULL for-1.6 00/11] Block patches Stefan Hajnoczi
` (3 preceding siblings ...)
2013-07-19 8:38 ` [Qemu-devel] [PATCH 04/11] QEMUBH: make AioContext's bh re-entrant Stefan Hajnoczi
@ 2013-07-19 8:38 ` Stefan Hajnoczi
2013-07-19 8:38 ` [Qemu-devel] [PATCH 06/11] block: add bdrv_write_zeroes() Stefan Hajnoczi
` (5 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Stefan Hajnoczi @ 2013-07-19 8:38 UTC (permalink / raw)
To: qemu-devel; +Cc: Anthony Liguori, Fam Zheng, Stefan Hajnoczi
From: Fam Zheng <famz@redhat.com>
s->qcow and s->qcow_filename are allocated but not freed on error. Fix the
possible leaks, remove unnecessary check for bdrv_new(), propagate ret code of
bdrv_create() and also the one of enable_write_target().
Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
block/vvfat.c | 25 ++++++++++++++-----------
1 file changed, 14 insertions(+), 11 deletions(-)
diff --git a/block/vvfat.c b/block/vvfat.c
index 87b0279..cd3b8ed 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -1164,8 +1164,8 @@ DLOG(if (stderr == NULL) {
s->sector_count = cyls * heads * secs - (s->first_sectors_number - 1);
if (qemu_opt_get_bool(opts, "rw", false)) {
- if (enable_write_target(s)) {
- ret = -EIO;
+ ret = enable_write_target(s);
+ if (ret < 0) {
goto fail;
}
bs->read_only = 0;
@@ -2917,9 +2917,7 @@ static int enable_write_target(BDRVVVFATState *s)
s->qcow_filename = g_malloc(1024);
ret = get_tmp_filename(s->qcow_filename, 1024);
if (ret < 0) {
- g_free(s->qcow_filename);
- s->qcow_filename = NULL;
- return ret;
+ goto err;
}
bdrv_qcow = bdrv_find_format("qcow");
@@ -2927,18 +2925,18 @@ static int enable_write_target(BDRVVVFATState *s)
set_option_parameter_int(options, BLOCK_OPT_SIZE, s->sector_count * 512);
set_option_parameter(options, BLOCK_OPT_BACKING_FILE, "fat:");
- if (bdrv_create(bdrv_qcow, s->qcow_filename, options) < 0)
- return -1;
+ ret = bdrv_create(bdrv_qcow, s->qcow_filename, options);
+ if (ret < 0) {
+ goto err;
+ }
s->qcow = bdrv_new("");
- if (s->qcow == NULL) {
- return -1;
- }
ret = bdrv_open(s->qcow, s->qcow_filename, NULL,
BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, bdrv_qcow);
if (ret < 0) {
- return ret;
+ bdrv_delete(s->qcow);
+ goto err;
}
#ifndef _WIN32
@@ -2951,6 +2949,11 @@ static int enable_write_target(BDRVVVFATState *s)
*(void**)s->bs->backing_hd->opaque = s;
return 0;
+
+err:
+ g_free(s->qcow_filename);
+ s->qcow_filename = NULL;
+ return ret;
}
static void vvfat_close(BlockDriverState *bs)
--
1.8.1.4
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH 06/11] block: add bdrv_write_zeroes()
2013-07-19 8:38 [Qemu-devel] [PULL for-1.6 00/11] Block patches Stefan Hajnoczi
` (4 preceding siblings ...)
2013-07-19 8:38 ` [Qemu-devel] [PATCH 05/11] block: fix vvfat error path for enable_write_target Stefan Hajnoczi
@ 2013-07-19 8:38 ` Stefan Hajnoczi
2013-07-19 8:38 ` [Qemu-devel] [PATCH 07/11] block/raw: add bdrv_co_write_zeroes Stefan Hajnoczi
` (4 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Stefan Hajnoczi @ 2013-07-19 8:38 UTC (permalink / raw)
To: qemu-devel; +Cc: Anthony Liguori, Peter Lieven, Stefan Hajnoczi
From: Peter Lieven <pl@kamp.de>
Signed-off-by: Peter Lieven <pl@kamp.de>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
block.c | 27 +++++++++++++++++++--------
include/block/block.h | 2 ++
2 files changed, 21 insertions(+), 8 deletions(-)
diff --git a/block.c b/block.c
index b560241..b05e2d6 100644
--- a/block.c
+++ b/block.c
@@ -2162,6 +2162,7 @@ typedef struct RwCo {
QEMUIOVector *qiov;
bool is_write;
int ret;
+ BdrvRequestFlags flags;
} RwCo;
static void coroutine_fn bdrv_rw_co_entry(void *opaque)
@@ -2170,10 +2171,12 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque)
if (!rwco->is_write) {
rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
- rwco->nb_sectors, rwco->qiov, 0);
+ rwco->nb_sectors, rwco->qiov,
+ rwco->flags);
} else {
rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
- rwco->nb_sectors, rwco->qiov, 0);
+ rwco->nb_sectors, rwco->qiov,
+ rwco->flags);
}
}
@@ -2181,7 +2184,8 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque)
* Process a vectored synchronous request using coroutines
*/
static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *qiov, bool is_write)
+ QEMUIOVector *qiov, bool is_write,
+ BdrvRequestFlags flags)
{
Coroutine *co;
RwCo rwco = {
@@ -2191,6 +2195,7 @@ static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num,
.qiov = qiov,
.is_write = is_write,
.ret = NOT_DONE,
+ .flags = flags,
};
assert((qiov->size & (BDRV_SECTOR_SIZE - 1)) == 0);
@@ -2222,7 +2227,7 @@ static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num,
* Process a synchronous request using coroutines
*/
static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
- int nb_sectors, bool is_write)
+ int nb_sectors, bool is_write, BdrvRequestFlags flags)
{
QEMUIOVector qiov;
struct iovec iov = {
@@ -2231,14 +2236,14 @@ static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
};
qemu_iovec_init_external(&qiov, &iov, 1);
- return bdrv_rwv_co(bs, sector_num, &qiov, is_write);
+ return bdrv_rwv_co(bs, sector_num, &qiov, is_write, flags);
}
/* return < 0 if error. See bdrv_write() for the return codes */
int bdrv_read(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf, int nb_sectors)
{
- return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
+ return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
}
/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
@@ -2264,12 +2269,18 @@ int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
int bdrv_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors)
{
- return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
+ return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
}
int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov)
{
- return bdrv_rwv_co(bs, sector_num, qiov, true);
+ return bdrv_rwv_co(bs, sector_num, qiov, true, 0);
+}
+
+int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
+{
+ return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
+ BDRV_REQ_ZERO_WRITE);
}
int bdrv_pread(BlockDriverState *bs, int64_t offset,
diff --git a/include/block/block.h b/include/block/block.h
index b6b9014..742fce5 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -157,6 +157,8 @@ int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf, int nb_sectors);
int bdrv_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors);
+int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors);
int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov);
int bdrv_pread(BlockDriverState *bs, int64_t offset,
void *buf, int count);
--
1.8.1.4
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH 07/11] block/raw: add bdrv_co_write_zeroes
2013-07-19 8:38 [Qemu-devel] [PULL for-1.6 00/11] Block patches Stefan Hajnoczi
` (5 preceding siblings ...)
2013-07-19 8:38 ` [Qemu-devel] [PATCH 06/11] block: add bdrv_write_zeroes() Stefan Hajnoczi
@ 2013-07-19 8:38 ` Stefan Hajnoczi
2013-07-19 8:38 ` [Qemu-devel] [PATCH 08/11] block-migration: efficiently encode zero blocks Stefan Hajnoczi
` (3 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Stefan Hajnoczi @ 2013-07-19 8:38 UTC (permalink / raw)
To: qemu-devel; +Cc: Anthony Liguori, Peter Lieven, Stefan Hajnoczi
From: Peter Lieven <pl@kamp.de>
Signed-off-by: Peter Lieven <pl@kamp.de>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
block/raw.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/block/raw.c b/block/raw.c
index ce10422..8c81de9 100644
--- a/block/raw.c
+++ b/block/raw.c
@@ -42,6 +42,13 @@ static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
return bdrv_co_is_allocated(bs->file, sector_num, nb_sectors, pnum);
}
+static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors)
+{
+ return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors);
+}
+
static int64_t raw_getlength(BlockDriverState *bs)
{
return bdrv_getlength(bs->file);
@@ -128,6 +135,7 @@ static BlockDriver bdrv_raw = {
.bdrv_co_readv = raw_co_readv,
.bdrv_co_writev = raw_co_writev,
.bdrv_co_is_allocated = raw_co_is_allocated,
+ .bdrv_co_write_zeroes = raw_co_write_zeroes,
.bdrv_co_discard = raw_co_discard,
.bdrv_probe = raw_probe,
--
1.8.1.4
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH 08/11] block-migration: efficiently encode zero blocks
2013-07-19 8:38 [Qemu-devel] [PULL for-1.6 00/11] Block patches Stefan Hajnoczi
` (6 preceding siblings ...)
2013-07-19 8:38 ` [Qemu-devel] [PATCH 07/11] block/raw: add bdrv_co_write_zeroes Stefan Hajnoczi
@ 2013-07-19 8:38 ` Stefan Hajnoczi
2013-07-19 8:38 ` [Qemu-devel] [PATCH 09/11] cpus: Let vm_stop[_force_state]() always flush block devices Stefan Hajnoczi
` (2 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Stefan Hajnoczi @ 2013-07-19 8:38 UTC (permalink / raw)
To: qemu-devel; +Cc: Anthony Liguori, Peter Lieven, Stefan Hajnoczi
From: Peter Lieven <pl@kamp.de>
this patch adds a efficient encoding for zero blocks by
adding a new flag indicating a block is completely zero.
additionally bdrv_write_zeros() is used at the destination
to efficiently write these zeroes. depending on the implementation
this avoids that the destination target gets fully provisioned.
Signed-off-by: Peter Lieven <pl@kamp.de>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
block-migration.c | 32 ++++++++++++++++++++++++++------
include/migration/migration.h | 1 +
migration.c | 9 +++++++++
qapi-schema.json | 8 +++++++-
4 files changed, 43 insertions(+), 7 deletions(-)
diff --git a/block-migration.c b/block-migration.c
index 2fd7699..f803f20 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -29,6 +29,7 @@
#define BLK_MIG_FLAG_DEVICE_BLOCK 0x01
#define BLK_MIG_FLAG_EOS 0x02
#define BLK_MIG_FLAG_PROGRESS 0x04
+#define BLK_MIG_FLAG_ZERO_BLOCK 0x08
#define MAX_IS_ALLOCATED_SEARCH 65536
@@ -80,6 +81,7 @@ typedef struct BlkMigState {
int shared_base;
QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
int64_t total_sector_sum;
+ bool zero_blocks;
/* Protected by lock. */
QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list;
@@ -114,16 +116,30 @@ static void blk_mig_unlock(void)
static void blk_send(QEMUFile *f, BlkMigBlock * blk)
{
int len;
+ uint64_t flags = BLK_MIG_FLAG_DEVICE_BLOCK;
+
+ if (block_mig_state.zero_blocks &&
+ buffer_is_zero(blk->buf, BLOCK_SIZE)) {
+ flags |= BLK_MIG_FLAG_ZERO_BLOCK;
+ }
/* sector number and flags */
qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
- | BLK_MIG_FLAG_DEVICE_BLOCK);
+ | flags);
/* device name */
len = strlen(blk->bmds->bs->device_name);
qemu_put_byte(f, len);
qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);
+ /* if a block is zero we need to flush here since the network
+ * bandwidth is now a lot higher than the storage device bandwidth.
+ * thus if we queue zero blocks we slow down the migration */
+ if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
+ qemu_fflush(f);
+ return;
+ }
+
qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
}
@@ -344,6 +360,7 @@ static void init_blk_migration(QEMUFile *f)
block_mig_state.total_sector_sum = 0;
block_mig_state.prev_progress = -1;
block_mig_state.bulk_completed = 0;
+ block_mig_state.zero_blocks = migrate_zero_blocks();
bdrv_iterate(init_blk_migration_it, NULL);
}
@@ -762,12 +779,15 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
}
- buf = g_malloc(BLOCK_SIZE);
-
- qemu_get_buffer(f, buf, BLOCK_SIZE);
- ret = bdrv_write(bs, addr, buf, nr_sectors);
+ if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
+ ret = bdrv_write_zeroes(bs, addr, nr_sectors);
+ } else {
+ buf = g_malloc(BLOCK_SIZE);
+ qemu_get_buffer(f, buf, BLOCK_SIZE);
+ ret = bdrv_write(bs, addr, buf, nr_sectors);
+ g_free(buf);
+ }
- g_free(buf);
if (ret < 0) {
return ret;
}
diff --git a/include/migration/migration.h b/include/migration/migration.h
index bc9fde0..701709a 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -124,6 +124,7 @@ void migrate_add_blocker(Error *reason);
void migrate_del_blocker(Error *reason);
bool migrate_rdma_pin_all(void);
+bool migrate_zero_blocks(void);
bool migrate_auto_converge(void);
diff --git a/migration.c b/migration.c
index 9f5a423..a9c0421 100644
--- a/migration.c
+++ b/migration.c
@@ -493,6 +493,15 @@ bool migrate_auto_converge(void)
return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
}
+bool migrate_zero_blocks(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
+}
+
int migrate_use_xbzrle(void)
{
MigrationState *s;
diff --git a/qapi-schema.json b/qapi-schema.json
index 8d33d52..592bb9c 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -613,10 +613,16 @@
# Disabled by default. Experimental: may (or may not) be renamed after
# further testing is complete. (since 1.6)
#
+# @zero-blocks: During storage migration encode blocks of zeroes efficiently. This
+# essentially saves 1MB of zeroes per block on the wire. Enabling requires
+# source and target VM to support this feature. To enable it is sufficient
+# to enable the capability on the source VM. The feature is disabled by
+# default. (since 1.6)
+#
# Since: 1.2
##
{ 'enum': 'MigrationCapability',
- 'data': ['xbzrle', 'x-rdma-pin-all', 'auto-converge'] }
+ 'data': ['xbzrle', 'x-rdma-pin-all', 'auto-converge', 'zero-blocks'] }
##
# @MigrationCapabilityStatus
--
1.8.1.4
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH 09/11] cpus: Let vm_stop[_force_state]() always flush block devices
2013-07-19 8:38 [Qemu-devel] [PULL for-1.6 00/11] Block patches Stefan Hajnoczi
` (7 preceding siblings ...)
2013-07-19 8:38 ` [Qemu-devel] [PATCH 08/11] block-migration: efficiently encode zero blocks Stefan Hajnoczi
@ 2013-07-19 8:38 ` Stefan Hajnoczi
2013-07-19 8:38 ` [Qemu-devel] [PATCH 10/11] block: fix bdrv_read_unthrottled() Stefan Hajnoczi
2013-07-19 8:38 ` [Qemu-devel] [PATCH 11/11] block/raw: add .bdrv_get_info Stefan Hajnoczi
10 siblings, 0 replies; 12+ messages in thread
From: Stefan Hajnoczi @ 2013-07-19 8:38 UTC (permalink / raw)
To: qemu-devel; +Cc: Kevin Wolf, Anthony Liguori, Stefan Hajnoczi
From: Kevin Wolf <kwolf@redhat.com>
Even if the VM is already stopped, we cannot assume that all data has
already been successfully flushed to disk. The flush during the previous
vm_stop() could have failed.
Run bdrv_flush_all() unconditionally so that we get an error each time
if the block device isn't really flushed.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
cpus.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/cpus.c b/cpus.c
index 8062cdd..2509eb5 100644
--- a/cpus.c
+++ b/cpus.c
@@ -443,11 +443,12 @@ static int do_vm_stop(RunState state)
pause_all_vcpus();
runstate_set(state);
vm_state_notify(0, state);
- bdrv_drain_all();
- ret = bdrv_flush_all();
monitor_protocol_event(QEVENT_STOP, NULL);
}
+ bdrv_drain_all();
+ ret = bdrv_flush_all();
+
return ret;
}
@@ -1126,7 +1127,9 @@ int vm_stop_force_state(RunState state)
return vm_stop(state);
} else {
runstate_set(state);
- return 0;
+ /* Make sure to return an error if the flush in a previous vm_stop()
+ * failed. */
+ return bdrv_flush_all();
}
}
--
1.8.1.4
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH 10/11] block: fix bdrv_read_unthrottled()
2013-07-19 8:38 [Qemu-devel] [PULL for-1.6 00/11] Block patches Stefan Hajnoczi
` (8 preceding siblings ...)
2013-07-19 8:38 ` [Qemu-devel] [PATCH 09/11] cpus: Let vm_stop[_force_state]() always flush block devices Stefan Hajnoczi
@ 2013-07-19 8:38 ` Stefan Hajnoczi
2013-07-19 8:38 ` [Qemu-devel] [PATCH 11/11] block/raw: add .bdrv_get_info Stefan Hajnoczi
10 siblings, 0 replies; 12+ messages in thread
From: Stefan Hajnoczi @ 2013-07-19 8:38 UTC (permalink / raw)
To: qemu-devel; +Cc: Anthony Liguori, Peter Lieven, Stefan Hajnoczi
From: Peter Lieven <pl@kamp.de>
Signed-off-by: Peter Lieven <pl@kamp.de>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
block.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/block.c b/block.c
index b05e2d6..6cd39fa 100644
--- a/block.c
+++ b/block.c
@@ -2255,7 +2255,7 @@ int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
enabled = bs->io_limits_enabled;
bs->io_limits_enabled = false;
- ret = bdrv_read(bs, 0, buf, 1);
+ ret = bdrv_read(bs, sector_num, buf, nb_sectors);
bs->io_limits_enabled = enabled;
return ret;
}
--
1.8.1.4
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH 11/11] block/raw: add .bdrv_get_info
2013-07-19 8:38 [Qemu-devel] [PULL for-1.6 00/11] Block patches Stefan Hajnoczi
` (9 preceding siblings ...)
2013-07-19 8:38 ` [Qemu-devel] [PATCH 10/11] block: fix bdrv_read_unthrottled() Stefan Hajnoczi
@ 2013-07-19 8:38 ` Stefan Hajnoczi
10 siblings, 0 replies; 12+ messages in thread
From: Stefan Hajnoczi @ 2013-07-19 8:38 UTC (permalink / raw)
To: qemu-devel; +Cc: Anthony Liguori, Peter Lieven, Stefan Hajnoczi
From: Peter Lieven <pl@kamp.de>
Signed-off-by: Peter Lieven <pl@kamp.de>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
block/raw.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/block/raw.c b/block/raw.c
index 8c81de9..f1682d4 100644
--- a/block/raw.c
+++ b/block/raw.c
@@ -121,6 +121,11 @@ static int raw_has_zero_init(BlockDriverState *bs)
return bdrv_has_zero_init(bs->file);
}
+static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ return bdrv_get_info(bs->file, bdi);
+}
+
static BlockDriver bdrv_raw = {
.format_name = "raw",
@@ -140,6 +145,7 @@ static BlockDriver bdrv_raw = {
.bdrv_probe = raw_probe,
.bdrv_getlength = raw_getlength,
+ .bdrv_get_info = raw_get_info,
.bdrv_truncate = raw_truncate,
.bdrv_is_inserted = raw_is_inserted,
--
1.8.1.4
^ permalink raw reply related [flat|nested] 12+ messages in thread