* [Qemu-devel] [PATCH 0/2] file-posix: Make truncate/preallocate asynchronous
@ 2018-06-21 17:06 Kevin Wolf
2018-06-21 17:06 ` [Qemu-devel] [PATCH 1/2] block: Convert .bdrv_truncate callback to coroutine_fn Kevin Wolf
2018-06-21 17:06 ` [Qemu-devel] [PATCH 2/2] file-posix: Make .bdrv_co_truncate asynchronous Kevin Wolf
0 siblings, 2 replies; 9+ messages in thread
From: Kevin Wolf @ 2018-06-21 17:06 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, mreitz, jcody, qemu-devel
This fixes the problem that blockdev-create on a local file blocks the
main loop despite being a background job. This was caused by file-posix
preallocating the image with blocking syscalls rather than moving this
to the thread pool and yielding the coroutine meanwhile.
Kevin Wolf (2):
block: Convert .bdrv_truncate callback to coroutine_fn
file-posix: Make .bdrv_co_truncate asynchronous
include/block/block.h | 4 +
include/block/block_int.h | 4 +-
include/block/raw-aio.h | 4 +-
block.c | 49 +++++++-
block/copy-on-read.c | 8 +-
block/crypto.c | 9 +-
block/file-posix.c | 277 ++++++++++++++++++++++++++--------------------
block/file-win32.c | 6 +-
block/gluster.c | 14 ++-
block/iscsi.c | 8 +-
block/nfs.c | 7 +-
block/qcow2.c | 14 +--
block/qed.c | 8 +-
block/raw-format.c | 8 +-
block/rbd.c | 8 +-
block/sheepdog.c | 12 +-
block/ssh.c | 6 +-
17 files changed, 267 insertions(+), 179 deletions(-)
--
2.13.6
^ permalink raw reply [flat|nested] 9+ messages in thread
* [Qemu-devel] [PATCH 1/2] block: Convert .bdrv_truncate callback to coroutine_fn
2018-06-21 17:06 [Qemu-devel] [PATCH 0/2] file-posix: Make truncate/preallocate asynchronous Kevin Wolf
@ 2018-06-21 17:06 ` Kevin Wolf
2018-06-25 9:02 ` [Qemu-devel] [Qemu-block] " Stefan Hajnoczi
2018-06-21 17:06 ` [Qemu-devel] [PATCH 2/2] file-posix: Make .bdrv_co_truncate asynchronous Kevin Wolf
1 sibling, 1 reply; 9+ messages in thread
From: Kevin Wolf @ 2018-06-21 17:06 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, mreitz, jcody, qemu-devel
bdrv_truncate() is an operation that can block (even for a quite long
time, depending on the PreallocMode) in I/O paths that shouldn't block.
Convert it to a coroutine_fn so that we have the infrastructure for
drivers to make their .bdrv_co_truncate implementation asynchronous.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
include/block/block.h | 4 ++++
include/block/block_int.h | 4 ++--
block.c | 49 ++++++++++++++++++++++++++++++++++++++++++-----
block/copy-on-read.c | 8 ++++----
block/crypto.c | 9 +++++----
block/file-posix.c | 12 ++++++------
block/file-win32.c | 6 +++---
block/gluster.c | 14 ++++++++------
block/iscsi.c | 8 ++++----
block/nfs.c | 7 ++++---
block/qcow2.c | 14 +++++++-------
block/qed.c | 8 +++++---
block/raw-format.c | 8 ++++----
block/rbd.c | 8 +++++---
block/sheepdog.c | 12 ++++++------
block/ssh.c | 6 +++---
16 files changed, 114 insertions(+), 63 deletions(-)
diff --git a/include/block/block.h b/include/block/block.h
index b1d6fdb97a..42e59ff585 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -300,8 +300,12 @@ int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
const char *backing_file);
void bdrv_refresh_filename(BlockDriverState *bs);
+
+int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset,
+ PreallocMode prealloc, Error **errp);
int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc,
Error **errp);
+
int64_t bdrv_nb_sectors(BlockDriverState *bs);
int64_t bdrv_getlength(BlockDriverState *bs);
int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 74646ed722..c653ee663a 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -289,8 +289,8 @@ struct BlockDriver {
* bdrv_parse_filename.
*/
const char *protocol_name;
- int (*bdrv_truncate)(BlockDriverState *bs, int64_t offset,
- PreallocMode prealloc, Error **errp);
+ int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset,
+ PreallocMode prealloc, Error **errp);
int64_t (*bdrv_getlength)(BlockDriverState *bs);
bool has_variable_length;
diff --git a/block.c b/block.c
index 1b8147c1b3..39b7127945 100644
--- a/block.c
+++ b/block.c
@@ -3788,8 +3788,8 @@ exit:
/**
* Truncate file to 'offset' bytes (needed only for file protocols)
*/
-int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc,
- Error **errp)
+int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset,
+ PreallocMode prealloc, Error **errp)
{
BlockDriverState *bs = child->bs;
BlockDriver *drv = bs->drv;
@@ -3807,9 +3807,9 @@ int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc,
return -EINVAL;
}
- if (!drv->bdrv_truncate) {
+ if (!drv->bdrv_co_truncate) {
if (bs->file && drv->is_filter) {
- return bdrv_truncate(bs->file, offset, prealloc, errp);
+ return bdrv_co_truncate(bs->file, offset, prealloc, errp);
}
error_setg(errp, "Image format driver does not support resize");
return -ENOTSUP;
@@ -3821,7 +3821,7 @@ int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc,
assert(!(bs->open_flags & BDRV_O_INACTIVE));
- ret = drv->bdrv_truncate(bs, offset, prealloc, errp);
+ ret = drv->bdrv_co_truncate(bs, offset, prealloc, errp);
if (ret < 0) {
return ret;
}
@@ -3837,6 +3837,45 @@ int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc,
return ret;
}
+typedef struct TruncateCo {
+ BdrvChild *child;
+ int64_t offset;
+ PreallocMode prealloc;
+ Error **errp;
+ int ret;
+} TruncateCo;
+
+static void coroutine_fn bdrv_truncate_co_entry(void *opaque)
+{
+ TruncateCo *tco = opaque;
+ tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->prealloc,
+ tco->errp);
+}
+
+int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc,
+ Error **errp)
+{
+ Coroutine *co;
+ TruncateCo tco = {
+ .child = child,
+ .offset = offset,
+ .prealloc = prealloc,
+ .errp = errp,
+ .ret = NOT_DONE,
+ };
+
+ if (qemu_in_coroutine()) {
+ /* Fast-path if already in coroutine context */
+ bdrv_truncate_co_entry(&tco);
+ } else {
+ co = qemu_coroutine_create(bdrv_truncate_co_entry, &tco);
+ qemu_coroutine_enter(co);
+ BDRV_POLL_WHILE(child->bs, tco.ret == NOT_DONE);
+ }
+
+ return tco.ret;
+}
+
/**
* Length of a allocated file in bytes. Sparse files are counted by actual
* allocated space. Return < 0 if error or unknown.
diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 6a97208888..1dcdaeed69 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -80,10 +80,10 @@ static int64_t cor_getlength(BlockDriverState *bs)
}
-static int cor_truncate(BlockDriverState *bs, int64_t offset,
- PreallocMode prealloc, Error **errp)
+static int coroutine_fn cor_co_truncate(BlockDriverState *bs, int64_t offset,
+ PreallocMode prealloc, Error **errp)
{
- return bdrv_truncate(bs->file, offset, prealloc, errp);
+ return bdrv_co_truncate(bs->file, offset, prealloc, errp);
}
@@ -147,7 +147,7 @@ BlockDriver bdrv_copy_on_read = {
.bdrv_child_perm = cor_child_perm,
.bdrv_getlength = cor_getlength,
- .bdrv_truncate = cor_truncate,
+ .bdrv_co_truncate = cor_co_truncate,
.bdrv_co_preadv = cor_co_preadv,
.bdrv_co_pwritev = cor_co_pwritev,
diff --git a/block/crypto.c b/block/crypto.c
index 82091c5f70..1b89ff129e 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -357,8 +357,9 @@ static int block_crypto_co_create_generic(BlockDriverState *bs,
return ret;
}
-static int block_crypto_truncate(BlockDriverState *bs, int64_t offset,
- PreallocMode prealloc, Error **errp)
+static int coroutine_fn
+block_crypto_co_truncate(BlockDriverState *bs, int64_t offset,
+ PreallocMode prealloc, Error **errp)
{
BlockCrypto *crypto = bs->opaque;
uint64_t payload_offset =
@@ -371,7 +372,7 @@ static int block_crypto_truncate(BlockDriverState *bs, int64_t offset,
offset += payload_offset;
- return bdrv_truncate(bs->file, offset, prealloc, errp);
+ return bdrv_co_truncate(bs->file, offset, prealloc, errp);
}
static void block_crypto_close(BlockDriverState *bs)
@@ -700,7 +701,7 @@ BlockDriver bdrv_crypto_luks = {
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_co_create = block_crypto_co_create_luks,
.bdrv_co_create_opts = block_crypto_co_create_opts_luks,
- .bdrv_truncate = block_crypto_truncate,
+ .bdrv_co_truncate = block_crypto_co_truncate,
.create_opts = &block_crypto_create_opts_luks,
.bdrv_reopen_prepare = block_crypto_reopen_prepare,
diff --git a/block/file-posix.c b/block/file-posix.c
index 07bb061fe4..6223a8bccc 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -1856,8 +1856,8 @@ out:
return result;
}
-static int raw_truncate(BlockDriverState *bs, int64_t offset,
- PreallocMode prealloc, Error **errp)
+static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
+ PreallocMode prealloc, Error **errp)
{
BDRVRawState *s = bs->opaque;
struct stat st;
@@ -2602,7 +2602,7 @@ BlockDriver bdrv_file = {
.bdrv_io_plug = raw_aio_plug,
.bdrv_io_unplug = raw_aio_unplug,
- .bdrv_truncate = raw_truncate,
+ .bdrv_co_truncate = raw_co_truncate,
.bdrv_getlength = raw_getlength,
.bdrv_get_info = raw_get_info,
.bdrv_get_allocated_file_size
@@ -3082,7 +3082,7 @@ static BlockDriver bdrv_host_device = {
.bdrv_io_plug = raw_aio_plug,
.bdrv_io_unplug = raw_aio_unplug,
- .bdrv_truncate = raw_truncate,
+ .bdrv_co_truncate = raw_co_truncate,
.bdrv_getlength = raw_getlength,
.bdrv_get_info = raw_get_info,
.bdrv_get_allocated_file_size
@@ -3204,7 +3204,7 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_io_plug = raw_aio_plug,
.bdrv_io_unplug = raw_aio_unplug,
- .bdrv_truncate = raw_truncate,
+ .bdrv_co_truncate = raw_co_truncate,
.bdrv_getlength = raw_getlength,
.has_variable_length = true,
.bdrv_get_allocated_file_size
@@ -3334,7 +3334,7 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_io_plug = raw_aio_plug,
.bdrv_io_unplug = raw_aio_unplug,
- .bdrv_truncate = raw_truncate,
+ .bdrv_co_truncate = raw_co_truncate,
.bdrv_getlength = raw_getlength,
.has_variable_length = true,
.bdrv_get_allocated_file_size
diff --git a/block/file-win32.c b/block/file-win32.c
index 3c67db4336..0411fe80fd 100644
--- a/block/file-win32.c
+++ b/block/file-win32.c
@@ -467,8 +467,8 @@ static void raw_close(BlockDriverState *bs)
}
}
-static int raw_truncate(BlockDriverState *bs, int64_t offset,
- PreallocMode prealloc, Error **errp)
+static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
+ PreallocMode prealloc, Error **errp)
{
BDRVRawState *s = bs->opaque;
LONG low, high;
@@ -640,7 +640,7 @@ BlockDriver bdrv_file = {
.bdrv_aio_pwritev = raw_aio_pwritev,
.bdrv_aio_flush = raw_aio_flush,
- .bdrv_truncate = raw_truncate,
+ .bdrv_co_truncate = raw_co_truncate,
.bdrv_getlength = raw_getlength,
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
diff --git a/block/gluster.c b/block/gluster.c
index b5fe7f3e87..a4e1c8ecd8 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -1177,8 +1177,10 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
return acb.ret;
}
-static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset,
- PreallocMode prealloc, Error **errp)
+static coroutine_fn int qemu_gluster_co_truncate(BlockDriverState *bs,
+ int64_t offset,
+ PreallocMode prealloc,
+ Error **errp)
{
BDRVGlusterState *s = bs->opaque;
return qemu_gluster_do_truncate(s->fd, offset, prealloc, errp);
@@ -1499,7 +1501,7 @@ static BlockDriver bdrv_gluster = {
.bdrv_co_create_opts = qemu_gluster_co_create_opts,
.bdrv_getlength = qemu_gluster_getlength,
.bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
- .bdrv_truncate = qemu_gluster_truncate,
+ .bdrv_co_truncate = qemu_gluster_co_truncate,
.bdrv_co_readv = qemu_gluster_co_readv,
.bdrv_co_writev = qemu_gluster_co_writev,
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
@@ -1528,7 +1530,7 @@ static BlockDriver bdrv_gluster_tcp = {
.bdrv_co_create_opts = qemu_gluster_co_create_opts,
.bdrv_getlength = qemu_gluster_getlength,
.bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
- .bdrv_truncate = qemu_gluster_truncate,
+ .bdrv_co_truncate = qemu_gluster_co_truncate,
.bdrv_co_readv = qemu_gluster_co_readv,
.bdrv_co_writev = qemu_gluster_co_writev,
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
@@ -1557,7 +1559,7 @@ static BlockDriver bdrv_gluster_unix = {
.bdrv_co_create_opts = qemu_gluster_co_create_opts,
.bdrv_getlength = qemu_gluster_getlength,
.bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
- .bdrv_truncate = qemu_gluster_truncate,
+ .bdrv_co_truncate = qemu_gluster_co_truncate,
.bdrv_co_readv = qemu_gluster_co_readv,
.bdrv_co_writev = qemu_gluster_co_writev,
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
@@ -1592,7 +1594,7 @@ static BlockDriver bdrv_gluster_rdma = {
.bdrv_co_create_opts = qemu_gluster_co_create_opts,
.bdrv_getlength = qemu_gluster_getlength,
.bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
- .bdrv_truncate = qemu_gluster_truncate,
+ .bdrv_co_truncate = qemu_gluster_co_truncate,
.bdrv_co_readv = qemu_gluster_co_readv,
.bdrv_co_writev = qemu_gluster_co_writev,
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
diff --git a/block/iscsi.c b/block/iscsi.c
index 9f00fb47a5..bc84b14e20 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -2085,8 +2085,8 @@ static void iscsi_reopen_commit(BDRVReopenState *reopen_state)
}
}
-static int iscsi_truncate(BlockDriverState *bs, int64_t offset,
- PreallocMode prealloc, Error **errp)
+static int coroutine_fn iscsi_co_truncate(BlockDriverState *bs, int64_t offset,
+ PreallocMode prealloc, Error **errp)
{
IscsiLun *iscsilun = bs->opaque;
Error *local_err = NULL;
@@ -2431,7 +2431,7 @@ static BlockDriver bdrv_iscsi = {
.bdrv_getlength = iscsi_getlength,
.bdrv_get_info = iscsi_get_info,
- .bdrv_truncate = iscsi_truncate,
+ .bdrv_co_truncate = iscsi_co_truncate,
.bdrv_refresh_limits = iscsi_refresh_limits,
.bdrv_co_block_status = iscsi_co_block_status,
@@ -2468,7 +2468,7 @@ static BlockDriver bdrv_iser = {
.bdrv_getlength = iscsi_getlength,
.bdrv_get_info = iscsi_get_info,
- .bdrv_truncate = iscsi_truncate,
+ .bdrv_co_truncate = iscsi_co_truncate,
.bdrv_refresh_limits = iscsi_refresh_limits,
.bdrv_co_block_status = iscsi_co_block_status,
diff --git a/block/nfs.c b/block/nfs.c
index 743ca0450e..eab1a2c408 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -743,8 +743,9 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
return (task.ret < 0 ? task.ret : st.st_blocks * 512);
}
-static int nfs_file_truncate(BlockDriverState *bs, int64_t offset,
- PreallocMode prealloc, Error **errp)
+static int coroutine_fn
+nfs_file_co_truncate(BlockDriverState *bs, int64_t offset,
+ PreallocMode prealloc, Error **errp)
{
NFSClient *client = bs->opaque;
int ret;
@@ -873,7 +874,7 @@ static BlockDriver bdrv_nfs = {
.bdrv_has_zero_init = nfs_has_zero_init,
.bdrv_get_allocated_file_size = nfs_get_allocated_file_size,
- .bdrv_truncate = nfs_file_truncate,
+ .bdrv_co_truncate = nfs_file_co_truncate,
.bdrv_file_open = nfs_file_open,
.bdrv_close = nfs_file_close,
diff --git a/block/qcow2.c b/block/qcow2.c
index 945132f692..b0b21889ac 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -3437,8 +3437,8 @@ fail:
return ret;
}
-static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
- PreallocMode prealloc, Error **errp)
+static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
+ PreallocMode prealloc, Error **errp)
{
BDRVQcow2State *s = bs->opaque;
uint64_t old_length;
@@ -3520,8 +3520,8 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
if ((last_cluster + 1) * s->cluster_size < old_file_size) {
Error *local_err = NULL;
- bdrv_truncate(bs->file, (last_cluster + 1) * s->cluster_size,
- PREALLOC_MODE_OFF, &local_err);
+ bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size,
+ PREALLOC_MODE_OFF, &local_err);
if (local_err) {
warn_reportf_err(local_err,
"Failed to truncate the tail of the image: ");
@@ -3605,7 +3605,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
/* Allocate the data area */
new_file_size = allocation_start +
nb_new_data_clusters * s->cluster_size;
- ret = bdrv_truncate(bs->file, new_file_size, prealloc, errp);
+ ret = bdrv_co_truncate(bs->file, new_file_size, prealloc, errp);
if (ret < 0) {
error_prepend(errp, "Failed to resize underlying file: ");
qcow2_free_clusters(bs, allocation_start,
@@ -3692,7 +3692,7 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
if (cluster_offset < 0) {
return cluster_offset;
}
- return bdrv_truncate(bs->file, cluster_offset, PREALLOC_MODE_OFF, NULL);
+ return bdrv_co_truncate(bs->file, cluster_offset, PREALLOC_MODE_OFF, NULL);
}
if (offset_into_cluster(s, offset)) {
@@ -4697,7 +4697,7 @@ BlockDriver bdrv_qcow2 = {
.bdrv_co_pdiscard = qcow2_co_pdiscard,
.bdrv_co_copy_range_from = qcow2_co_copy_range_from,
.bdrv_co_copy_range_to = qcow2_co_copy_range_to,
- .bdrv_truncate = qcow2_truncate,
+ .bdrv_co_truncate = qcow2_co_truncate,
.bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed,
.bdrv_make_empty = qcow2_make_empty,
diff --git a/block/qed.c b/block/qed.c
index 2363814538..689ea9d4d5 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -1467,8 +1467,10 @@ static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs,
QED_AIOCB_WRITE | QED_AIOCB_ZERO);
}
-static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset,
- PreallocMode prealloc, Error **errp)
+static int coroutine_fn bdrv_qed_co_truncate(BlockDriverState *bs,
+ int64_t offset,
+ PreallocMode prealloc,
+ Error **errp)
{
BDRVQEDState *s = bs->opaque;
uint64_t old_image_size;
@@ -1678,7 +1680,7 @@ static BlockDriver bdrv_qed = {
.bdrv_co_readv = bdrv_qed_co_readv,
.bdrv_co_writev = bdrv_qed_co_writev,
.bdrv_co_pwrite_zeroes = bdrv_qed_co_pwrite_zeroes,
- .bdrv_truncate = bdrv_qed_truncate,
+ .bdrv_co_truncate = bdrv_qed_co_truncate,
.bdrv_getlength = bdrv_qed_getlength,
.bdrv_get_info = bdrv_qed_get_info,
.bdrv_refresh_limits = bdrv_qed_refresh_limits,
diff --git a/block/raw-format.c b/block/raw-format.c
index f2e468df6f..b78da564d4 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -366,8 +366,8 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
}
}
-static int raw_truncate(BlockDriverState *bs, int64_t offset,
- PreallocMode prealloc, Error **errp)
+static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
+ PreallocMode prealloc, Error **errp)
{
BDRVRawState *s = bs->opaque;
@@ -383,7 +383,7 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset,
s->size = offset;
offset += s->offset;
- return bdrv_truncate(bs->file, offset, prealloc, errp);
+ return bdrv_co_truncate(bs->file, offset, prealloc, errp);
}
static void raw_eject(BlockDriverState *bs, bool eject_flag)
@@ -545,7 +545,7 @@ BlockDriver bdrv_raw = {
.bdrv_co_block_status = &raw_co_block_status,
.bdrv_co_copy_range_from = &raw_co_copy_range_from,
.bdrv_co_copy_range_to = &raw_co_copy_range_to,
- .bdrv_truncate = &raw_truncate,
+ .bdrv_co_truncate = &raw_co_truncate,
.bdrv_getlength = &raw_getlength,
.has_variable_length = true,
.bdrv_measure = &raw_measure,
diff --git a/block/rbd.c b/block/rbd.c
index f2c6965418..ca8e5bbace 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -990,8 +990,10 @@ static int64_t qemu_rbd_getlength(BlockDriverState *bs)
return info.size;
}
-static int qemu_rbd_truncate(BlockDriverState *bs, int64_t offset,
- PreallocMode prealloc, Error **errp)
+static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs,
+ int64_t offset,
+ PreallocMode prealloc,
+ Error **errp)
{
BDRVRBDState *s = bs->opaque;
int r;
@@ -1184,7 +1186,7 @@ static BlockDriver bdrv_rbd = {
.bdrv_get_info = qemu_rbd_getinfo,
.create_opts = &qemu_rbd_create_opts,
.bdrv_getlength = qemu_rbd_getlength,
- .bdrv_truncate = qemu_rbd_truncate,
+ .bdrv_co_truncate = qemu_rbd_co_truncate,
.protocol_name = "rbd",
.bdrv_aio_preadv = qemu_rbd_aio_preadv,
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 665b1763eb..b229a664d9 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -2292,8 +2292,8 @@ static int64_t sd_getlength(BlockDriverState *bs)
return s->inode.vdi_size;
}
-static int sd_truncate(BlockDriverState *bs, int64_t offset,
- PreallocMode prealloc, Error **errp)
+static int coroutine_fn sd_co_truncate(BlockDriverState *bs, int64_t offset,
+ PreallocMode prealloc, Error **errp)
{
BDRVSheepdogState *s = bs->opaque;
int ret, fd;
@@ -2609,7 +2609,7 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
assert(!flags);
if (offset > s->inode.vdi_size) {
- ret = sd_truncate(bs, offset, PREALLOC_MODE_OFF, NULL);
+ ret = sd_co_truncate(bs, offset, PREALLOC_MODE_OFF, NULL);
if (ret < 0) {
return ret;
}
@@ -3231,7 +3231,7 @@ static BlockDriver bdrv_sheepdog = {
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_getlength = sd_getlength,
.bdrv_get_allocated_file_size = sd_get_allocated_file_size,
- .bdrv_truncate = sd_truncate,
+ .bdrv_co_truncate = sd_co_truncate,
.bdrv_co_readv = sd_co_readv,
.bdrv_co_writev = sd_co_writev,
@@ -3268,7 +3268,7 @@ static BlockDriver bdrv_sheepdog_tcp = {
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_getlength = sd_getlength,
.bdrv_get_allocated_file_size = sd_get_allocated_file_size,
- .bdrv_truncate = sd_truncate,
+ .bdrv_co_truncate = sd_co_truncate,
.bdrv_co_readv = sd_co_readv,
.bdrv_co_writev = sd_co_writev,
@@ -3305,7 +3305,7 @@ static BlockDriver bdrv_sheepdog_unix = {
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_getlength = sd_getlength,
.bdrv_get_allocated_file_size = sd_get_allocated_file_size,
- .bdrv_truncate = sd_truncate,
+ .bdrv_co_truncate = sd_co_truncate,
.bdrv_co_readv = sd_co_readv,
.bdrv_co_writev = sd_co_writev,
diff --git a/block/ssh.c b/block/ssh.c
index da7bbf73e2..7fbc27abdf 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -1243,8 +1243,8 @@ static int64_t ssh_getlength(BlockDriverState *bs)
return length;
}
-static int ssh_truncate(BlockDriverState *bs, int64_t offset,
- PreallocMode prealloc, Error **errp)
+static int coroutine_fn ssh_co_truncate(BlockDriverState *bs, int64_t offset,
+ PreallocMode prealloc, Error **errp)
{
BDRVSSHState *s = bs->opaque;
@@ -1279,7 +1279,7 @@ static BlockDriver bdrv_ssh = {
.bdrv_co_readv = ssh_co_readv,
.bdrv_co_writev = ssh_co_writev,
.bdrv_getlength = ssh_getlength,
- .bdrv_truncate = ssh_truncate,
+ .bdrv_co_truncate = ssh_co_truncate,
.bdrv_co_flush_to_disk = ssh_co_flush,
.create_opts = &ssh_create_opts,
};
--
2.13.6
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [Qemu-devel] [PATCH 2/2] file-posix: Make .bdrv_co_truncate asynchronous
2018-06-21 17:06 [Qemu-devel] [PATCH 0/2] file-posix: Make truncate/preallocate asynchronous Kevin Wolf
2018-06-21 17:06 ` [Qemu-devel] [PATCH 1/2] block: Convert .bdrv_truncate callback to coroutine_fn Kevin Wolf
@ 2018-06-21 17:06 ` Kevin Wolf
2018-06-25 8:56 ` [Qemu-devel] [Qemu-block] " Stefan Hajnoczi
2018-06-25 14:24 ` [Qemu-devel] " Max Reitz
1 sibling, 2 replies; 9+ messages in thread
From: Kevin Wolf @ 2018-06-21 17:06 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, mreitz, jcody, qemu-devel
This moves the code to resize an image file to the thread pool to avoid
blocking.
Creating large images with preallocation with blockdev-create is now
actually a background job instead of blocking the monitor (and most
other things) until the preallocation has completed.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
include/block/raw-aio.h | 4 +-
block/file-posix.c | 265 +++++++++++++++++++++++++++---------------------
2 files changed, 153 insertions(+), 116 deletions(-)
diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
index 0e717fd475..e496ae31b2 100644
--- a/include/block/raw-aio.h
+++ b/include/block/raw-aio.h
@@ -26,6 +26,7 @@
#define QEMU_AIO_DISCARD 0x0010
#define QEMU_AIO_WRITE_ZEROES 0x0020
#define QEMU_AIO_COPY_RANGE 0x0040
+#define QEMU_AIO_TRUNCATE 0x0080
#define QEMU_AIO_TYPE_MASK \
(QEMU_AIO_READ | \
QEMU_AIO_WRITE | \
@@ -33,7 +34,8 @@
QEMU_AIO_FLUSH | \
QEMU_AIO_DISCARD | \
QEMU_AIO_WRITE_ZEROES | \
- QEMU_AIO_COPY_RANGE)
+ QEMU_AIO_COPY_RANGE | \
+ QEMU_AIO_TRUNCATE)
/* AIO flags */
#define QEMU_AIO_MISALIGNED 0x1000
diff --git a/block/file-posix.c b/block/file-posix.c
index 6223a8bccc..fa918f2bdb 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -188,8 +188,16 @@ typedef struct RawPosixAIOData {
#define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */
off_t aio_offset;
int aio_type;
- int aio_fd2;
- off_t aio_offset2;
+ union {
+ struct {
+ int aio_fd2;
+ off_t aio_offset2;
+ };
+ struct {
+ PreallocMode prealloc;
+ Error **errp;
+ };
+ };
} RawPosixAIOData;
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
@@ -1533,6 +1541,121 @@ static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
return ret;
}
+static int handle_aiocb_truncate(RawPosixAIOData *aiocb)
+{
+ int result = 0;
+ int64_t current_length = 0;
+ char *buf = NULL;
+ struct stat st;
+ int fd = aiocb->aio_fildes;
+ int64_t offset = aiocb->aio_offset;
+ Error **errp = aiocb->errp;
+
+ if (fstat(fd, &st) < 0) {
+ result = -errno;
+ error_setg_errno(errp, -result, "Could not stat file");
+ return result;
+ }
+
+ current_length = st.st_size;
+ if (current_length > offset && aiocb->prealloc != PREALLOC_MODE_OFF) {
+ error_setg(errp, "Cannot use preallocation for shrinking files");
+ return -ENOTSUP;
+ }
+
+ switch (aiocb->prealloc) {
+#ifdef CONFIG_POSIX_FALLOCATE
+ case PREALLOC_MODE_FALLOC:
+ /*
+ * Truncating before posix_fallocate() makes it about twice slower on
+ * file systems that do not support fallocate(), trying to check if a
+ * block is allocated before allocating it, so don't do that here.
+ */
+ if (offset != current_length) {
+ result = -posix_fallocate(fd, current_length, offset - current_length);
+ if (result != 0) {
+ /* posix_fallocate() doesn't set errno. */
+ error_setg_errno(errp, -result,
+ "Could not preallocate new data");
+ }
+ } else {
+ result = 0;
+ }
+ goto out;
+#endif
+ case PREALLOC_MODE_FULL:
+ {
+ int64_t num = 0, left = offset - current_length;
+ off_t seek_result;
+
+ /*
+ * Knowing the final size from the beginning could allow the file
+ * system driver to do less allocations and possibly avoid
+ * fragmentation of the file.
+ */
+ if (ftruncate(fd, offset) != 0) {
+ result = -errno;
+ error_setg_errno(errp, -result, "Could not resize file");
+ goto out;
+ }
+
+ buf = g_malloc0(65536);
+
+ seek_result = lseek(fd, current_length, SEEK_SET);
+ if (seek_result < 0) {
+ result = -errno;
+ error_setg_errno(errp, -result,
+ "Failed to seek to the old end of file");
+ goto out;
+ }
+
+ while (left > 0) {
+ num = MIN(left, 65536);
+ result = write(fd, buf, num);
+ if (result < 0) {
+ result = -errno;
+ error_setg_errno(errp, -result,
+ "Could not write zeros for preallocation");
+ goto out;
+ }
+ left -= result;
+ }
+ if (result >= 0) {
+ result = fsync(fd);
+ if (result < 0) {
+ result = -errno;
+ error_setg_errno(errp, -result,
+ "Could not flush file to disk");
+ goto out;
+ }
+ }
+ goto out;
+ }
+ case PREALLOC_MODE_OFF:
+ if (ftruncate(fd, offset) != 0) {
+ result = -errno;
+ error_setg_errno(errp, -result, "Could not resize file");
+ }
+ return result;
+ default:
+ result = -ENOTSUP;
+ error_setg(errp, "Unsupported preallocation mode: %s",
+ PreallocMode_str(aiocb->prealloc));
+ return result;
+ }
+
+out:
+ if (result < 0) {
+ if (ftruncate(fd, current_length) < 0) {
+ error_report("Failed to restore old file length: %s",
+ strerror(errno));
+ }
+ }
+
+ g_free(buf);
+ return result;
+}
+
static int aio_worker(void *arg)
{
RawPosixAIOData *aiocb = arg;
@@ -1576,6 +1699,9 @@ static int aio_worker(void *arg)
case QEMU_AIO_COPY_RANGE:
ret = handle_aiocb_copy_range(aiocb);
break;
+ case QEMU_AIO_TRUNCATE:
+ ret = handle_aiocb_truncate(aiocb);
+ break;
default:
fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
ret = -EINVAL;
@@ -1743,117 +1869,25 @@ static void raw_close(BlockDriverState *bs)
*
* Returns: 0 on success, -errno on failure.
*/
-static int raw_regular_truncate(int fd, int64_t offset, PreallocMode prealloc,
- Error **errp)
+static int coroutine_fn
+raw_regular_truncate(BlockDriverState *bs, int fd, int64_t offset,
+ PreallocMode prealloc, Error **errp)
{
- int result = 0;
- int64_t current_length = 0;
- char *buf = NULL;
- struct stat st;
-
- if (fstat(fd, &st) < 0) {
- result = -errno;
- error_setg_errno(errp, -result, "Could not stat file");
- return result;
- }
-
- current_length = st.st_size;
- if (current_length > offset && prealloc != PREALLOC_MODE_OFF) {
- error_setg(errp, "Cannot use preallocation for shrinking files");
- return -ENOTSUP;
- }
-
- switch (prealloc) {
-#ifdef CONFIG_POSIX_FALLOCATE
- case PREALLOC_MODE_FALLOC:
- /*
- * Truncating before posix_fallocate() makes it about twice slower on
- * file systems that do not support fallocate(), trying to check if a
- * block is allocated before allocating it, so don't do that here.
- */
- if (offset != current_length) {
- result = -posix_fallocate(fd, current_length, offset - current_length);
- if (result != 0) {
- /* posix_fallocate() doesn't set errno. */
- error_setg_errno(errp, -result,
- "Could not preallocate new data");
- }
- } else {
- result = 0;
- }
- goto out;
-#endif
- case PREALLOC_MODE_FULL:
- {
- int64_t num = 0, left = offset - current_length;
- off_t seek_result;
-
- /*
- * Knowing the final size from the beginning could allow the file
- * system driver to do less allocations and possibly avoid
- * fragmentation of the file.
- */
- if (ftruncate(fd, offset) != 0) {
- result = -errno;
- error_setg_errno(errp, -result, "Could not resize file");
- goto out;
- }
-
- buf = g_malloc0(65536);
-
- seek_result = lseek(fd, current_length, SEEK_SET);
- if (seek_result < 0) {
- result = -errno;
- error_setg_errno(errp, -result,
- "Failed to seek to the old end of file");
- goto out;
- }
-
- while (left > 0) {
- num = MIN(left, 65536);
- result = write(fd, buf, num);
- if (result < 0) {
- result = -errno;
- error_setg_errno(errp, -result,
- "Could not write zeros for preallocation");
- goto out;
- }
- left -= result;
- }
- if (result >= 0) {
- result = fsync(fd);
- if (result < 0) {
- result = -errno;
- error_setg_errno(errp, -result,
- "Could not flush file to disk");
- goto out;
- }
- }
- goto out;
- }
- case PREALLOC_MODE_OFF:
- if (ftruncate(fd, offset) != 0) {
- result = -errno;
- error_setg_errno(errp, -result, "Could not resize file");
- }
- return result;
- default:
- result = -ENOTSUP;
- error_setg(errp, "Unsupported preallocation mode: %s",
- PreallocMode_str(prealloc));
- return result;
- }
+ RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
+ ThreadPool *pool;
-out:
- if (result < 0) {
- if (ftruncate(fd, current_length) < 0) {
- error_report("Failed to restore old file length: %s",
- strerror(errno));
- }
- }
+ *acb = (RawPosixAIOData) {
+ .bs = bs,
+ .aio_fildes = fd,
+ .aio_type = QEMU_AIO_TRUNCATE,
+ .aio_offset = offset,
+ .prealloc = prealloc,
+ .errp = errp,
+ };
- g_free(buf);
- return result;
+ /* @bs can be NULL, bdrv_get_aio_context() returns the main context then */
+ pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
+ return thread_pool_submit_co(pool, aio_worker, acb);
}
static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
@@ -1870,7 +1904,7 @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
}
if (S_ISREG(st.st_mode)) {
- return raw_regular_truncate(s->fd, offset, prealloc, errp);
+ return raw_regular_truncate(bs, s->fd, offset, prealloc, errp);
}
if (prealloc != PREALLOC_MODE_OFF) {
@@ -2072,7 +2106,8 @@ static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
return (int64_t)st.st_blocks * 512;
}
-static int raw_co_create(BlockdevCreateOptions *options, Error **errp)
+static int coroutine_fn
+raw_co_create(BlockdevCreateOptions *options, Error **errp)
{
BlockdevCreateOptionsFile *file_opts;
int fd;
@@ -2124,7 +2159,7 @@ static int raw_co_create(BlockdevCreateOptions *options, Error **errp)
}
/* Clear the file by truncating it to 0 */
- result = raw_regular_truncate(fd, 0, PREALLOC_MODE_OFF, errp);
+ result = raw_regular_truncate(NULL, fd, 0, PREALLOC_MODE_OFF, errp);
if (result < 0) {
goto out_close;
}
@@ -2146,8 +2181,8 @@ static int raw_co_create(BlockdevCreateOptions *options, Error **errp)
/* Resize and potentially preallocate the file to the desired
* final size */
- result = raw_regular_truncate(fd, file_opts->size, file_opts->preallocation,
- errp);
+ result = raw_regular_truncate(NULL, fd, file_opts->size,
+ file_opts->preallocation, errp);
if (result < 0) {
goto out_close;
}
--
2.13.6
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [Qemu-block] [PATCH 2/2] file-posix: Make .bdrv_co_truncate asynchronous
2018-06-21 17:06 ` [Qemu-devel] [PATCH 2/2] file-posix: Make .bdrv_co_truncate asynchronous Kevin Wolf
@ 2018-06-25 8:56 ` Stefan Hajnoczi
2018-06-25 9:18 ` Kevin Wolf
2018-06-25 14:24 ` [Qemu-devel] " Max Reitz
1 sibling, 1 reply; 9+ messages in thread
From: Stefan Hajnoczi @ 2018-06-25 8:56 UTC (permalink / raw)
To: Kevin Wolf; +Cc: qemu-block, qemu-devel, mreitz
[-- Attachment #1: Type: text/plain, Size: 935 bytes --]
On Thu, Jun 21, 2018 at 07:06:57PM +0200, Kevin Wolf wrote:
> + RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
> + ThreadPool *pool;
>
> -out:
> - if (result < 0) {
> - if (ftruncate(fd, current_length) < 0) {
> - error_report("Failed to restore old file length: %s",
> - strerror(errno));
> - }
> - }
> + *acb = (RawPosixAIOData) {
> + .bs = bs,
> + .aio_fildes = fd,
> + .aio_type = QEMU_AIO_TRUNCATE,
> + .aio_offset = offset,
> + .prealloc = prealloc,
> + .errp = errp,
> + };
>
> - g_free(buf);
> - return result;
> + /* @bs can be NULL, bdrv_get_aio_context() returns the main context then */
> + pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
> + return thread_pool_submit_co(pool, aio_worker, acb);
Where is acb freed?
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [Qemu-block] [PATCH 1/2] block: Convert .bdrv_truncate callback to coroutine_fn
2018-06-21 17:06 ` [Qemu-devel] [PATCH 1/2] block: Convert .bdrv_truncate callback to coroutine_fn Kevin Wolf
@ 2018-06-25 9:02 ` Stefan Hajnoczi
2018-06-25 9:51 ` Kevin Wolf
0 siblings, 1 reply; 9+ messages in thread
From: Stefan Hajnoczi @ 2018-06-25 9:02 UTC (permalink / raw)
To: Kevin Wolf; +Cc: qemu-block, qemu-devel, mreitz
[-- Attachment #1: Type: text/plain, Size: 621 bytes --]
On Thu, Jun 21, 2018 at 07:06:56PM +0200, Kevin Wolf wrote:
> bdrv_truncate() is an operation that can block (even for a quite long
> time, depending on the PreallocMode) in I/O paths that shouldn't block.
> Convert it to a coroutine_fn so that we have the infrastructure for
> drivers to make their .bdrv_co_truncate implementation asynchronous.
block/commit.c:commit_run() invokes blk_truncate() outside of a drained
region. I haven't looked for other instances, but this patch opens the
door for races with other I/O requests. Are you sure it's safe to make
this asynchronous without request serialization?
Stefan
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [Qemu-block] [PATCH 2/2] file-posix: Make .bdrv_co_truncate asynchronous
2018-06-25 8:56 ` [Qemu-devel] [Qemu-block] " Stefan Hajnoczi
@ 2018-06-25 9:18 ` Kevin Wolf
0 siblings, 0 replies; 9+ messages in thread
From: Kevin Wolf @ 2018-06-25 9:18 UTC (permalink / raw)
To: Stefan Hajnoczi; +Cc: qemu-block, qemu-devel, mreitz
[-- Attachment #1: Type: text/plain, Size: 1270 bytes --]
Am 25.06.2018 um 10:56 hat Stefan Hajnoczi geschrieben:
> On Thu, Jun 21, 2018 at 07:06:57PM +0200, Kevin Wolf wrote:
> > + RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
> > + ThreadPool *pool;
> >
> > -out:
> > - if (result < 0) {
> > - if (ftruncate(fd, current_length) < 0) {
> > - error_report("Failed to restore old file length: %s",
> > - strerror(errno));
> > - }
> > - }
> > + *acb = (RawPosixAIOData) {
> > + .bs = bs,
> > + .aio_fildes = fd,
> > + .aio_type = QEMU_AIO_TRUNCATE,
> > + .aio_offset = offset,
> > + .prealloc = prealloc,
> > + .errp = errp,
> > + };
> >
> > - g_free(buf);
> > - return result;
> > + /* @bs can be NULL, bdrv_get_aio_context() returns the main context then */
> > + pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
> > + return thread_pool_submit_co(pool, aio_worker, acb);
>
> Where is acb freed?
At the end of aio_worker(). This is why I even need to malloc it here.
Once my other patch is in that removes the callback-based paio_submit(),
we could allocate the acb on the stack for all callers instead.
Kevin
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [Qemu-block] [PATCH 1/2] block: Convert .bdrv_truncate callback to coroutine_fn
2018-06-25 9:02 ` [Qemu-devel] [Qemu-block] " Stefan Hajnoczi
@ 2018-06-25 9:51 ` Kevin Wolf
2018-06-25 14:15 ` Max Reitz
0 siblings, 1 reply; 9+ messages in thread
From: Kevin Wolf @ 2018-06-25 9:51 UTC (permalink / raw)
To: Stefan Hajnoczi; +Cc: qemu-block, qemu-devel, mreitz
[-- Attachment #1: Type: text/plain, Size: 1960 bytes --]
Am 25.06.2018 um 11:02 hat Stefan Hajnoczi geschrieben:
> On Thu, Jun 21, 2018 at 07:06:56PM +0200, Kevin Wolf wrote:
> > bdrv_truncate() is an operation that can block (even for a quite long
> > time, depending on the PreallocMode) in I/O paths that shouldn't block.
> > Convert it to a coroutine_fn so that we have the infrastructure for
> > drivers to make their .bdrv_co_truncate implementation asynchronous.
>
> block/commit.c:commit_run() invokes blk_truncate() outside of a drained
> region. I haven't looked for other instances, but this patch opens the
> door for races with other I/O requests. Are you sure it's safe to make
> this asynchronous without request serialization?
After trying to explain why it's correct, I start to think that you're
right at least in one case. The new thing after this patch is that the
truncate operation isn't atomic any more. What this means depends on the
block driver:
* file-posix/win32: I think this one is okay. The truncate
implementation doesn't depend in any way on the content of the image.
Preallocation could be critical (in that it could overwrite
concurrently issued write requests), but the BDS size is only adjusted
after the driver callback has returned, so there can't be a concurrent
write request.
* copy-on-read, crypto, raw-format: Essentially just filter drivers that
pass the request to a child node, no problem.
* gluster, iscsi, nfs, rbd, ssh: Won't yield even after this series, so
trivially okay.
* qcow2: This one is where you're right, it needs to hold s->lock so
that the metadata modifications become safe.
* qed: Does a single header update, should be fine without locking.
* sheepdog: Doesn't yield until it starts preallocation. For
preallocation, the same reasoning as for file-posix applies.
So, if I got this right, the only thing to change is holding s->lock in
qcow2_co_truncate().
Do you agree?
Kevin
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [Qemu-block] [PATCH 1/2] block: Convert .bdrv_truncate callback to coroutine_fn
2018-06-25 9:51 ` Kevin Wolf
@ 2018-06-25 14:15 ` Max Reitz
0 siblings, 0 replies; 9+ messages in thread
From: Max Reitz @ 2018-06-25 14:15 UTC (permalink / raw)
To: Kevin Wolf, Stefan Hajnoczi; +Cc: qemu-block, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 2883 bytes --]
On 2018-06-25 11:51, Kevin Wolf wrote:
> Am 25.06.2018 um 11:02 hat Stefan Hajnoczi geschrieben:
>> On Thu, Jun 21, 2018 at 07:06:56PM +0200, Kevin Wolf wrote:
>>> bdrv_truncate() is an operation that can block (even for a quite long
>>> time, depending on the PreallocMode) in I/O paths that shouldn't block.
>>> Convert it to a coroutine_fn so that we have the infrastructure for
>>> drivers to make their .bdrv_co_truncate implementation asynchronous.
parallels_co_check() is a remaining coroutine_fn that calls
bdrv_truncate(), maybe it should call bdrv_co_truncate() now.
(And vhdx_allocate_block() probably should be a coroutine_fn, but that's
something for another time, I suppose.)
>> block/commit.c:commit_run() invokes blk_truncate() outside of a drained
>> region. I haven't looked for other instances, but this patch opens the
>> door for races with other I/O requests. Are you sure it's safe to make
>> this asynchronous without request serialization?
>
> After trying to explain why it's correct, I start to think that you're
> right at least in one case. The new thing after this patch is that the
> truncate operation isn't atomic any more. What this means depends on the
> block driver:
>
> * file-posix/win32: I think this one is okay. The truncate
> implementation doesn't depend in any way on the content of the image.
> Preallocation could be critical (in that it could overwrite
> concurrently issued write requests), but the BDS size is only adjusted
> after the driver callback has returned, so there can't be a concurrent
> write request.
Except when the BDS is growable (which every BDS is). qcow2 generally
writes beyond the EOF, so I suppose a concurrent preallocating
truncation may result in a race.
We don't have preallocating truncation over QMP yet, so technically this
is not an issue, but with your jobs series and this series, we may well
have it soon.
> * copy-on-read, crypto, raw-format: Essentially just filter drivers that
> pass the request to a child node, no problem.
>
> * gluster, iscsi, nfs, rbd, ssh: Won't yield even after this series, so
> trivially okay.
>
> * qcow2: This one is where you're right, it needs to hold s->lock so
> that the metadata modifications become safe.
>
> * qed: Does a single header update, should be fine without locking.
>
> * sheepdog: Doesn't yield until it starts preallocation. For
> preallocation, the same reasoning as for file-posix applies.
>
> So, if I got this right, the only thing to change is holding s->lock in
> qcow2_co_truncate().
By holding the lock, we would probably solve the race for qcow2, but
probably not for raw-format.
(And I think qcow2 and raw are the only formats that support
preallocation on truncation. (Well, raw doesn't really support it, but
it does allow it.))
Max
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [PATCH 2/2] file-posix: Make .bdrv_co_truncate asynchronous
2018-06-21 17:06 ` [Qemu-devel] [PATCH 2/2] file-posix: Make .bdrv_co_truncate asynchronous Kevin Wolf
2018-06-25 8:56 ` [Qemu-devel] [Qemu-block] " Stefan Hajnoczi
@ 2018-06-25 14:24 ` Max Reitz
1 sibling, 0 replies; 9+ messages in thread
From: Max Reitz @ 2018-06-25 14:24 UTC (permalink / raw)
To: Kevin Wolf, qemu-block; +Cc: jcody, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 1500 bytes --]
On 2018-06-21 19:06, Kevin Wolf wrote:
> This moves the code to resize an image file to the thread pool to avoid
> blocking.
>
> Creating large images with preallocation with blockdev-create is now
> actually a background job instead of blocking the monitor (and most
> other things) until the preallocation has completed.
>
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
> include/block/raw-aio.h | 4 +-
> block/file-posix.c | 265 +++++++++++++++++++++++++++---------------------
> 2 files changed, 153 insertions(+), 116 deletions(-)
[...]
> diff --git a/block/file-posix.c b/block/file-posix.c
> index 6223a8bccc..fa918f2bdb 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -188,8 +188,16 @@ typedef struct RawPosixAIOData {
> #define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */
> off_t aio_offset;
> int aio_type;
> - int aio_fd2;
> - off_t aio_offset2;
> + union {
> + struct {
> + int aio_fd2;
> + off_t aio_offset2;
> + };
> + struct {
> + PreallocMode prealloc;
> + Error **errp;
> + };
> + };
Comments might help (that the first struct is for *_COPY_RANGE and the
second one is for *_TRUNCATE).
But in general (without having actual knowledge of file-posix's AIO pool):
Reviewed-by: Max Reitz <mreitz@redhat.com>
> } RawPosixAIOData;
>
> #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2018-06-25 14:25 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-06-21 17:06 [Qemu-devel] [PATCH 0/2] file-posix: Make truncate/preallocate asynchronous Kevin Wolf
2018-06-21 17:06 ` [Qemu-devel] [PATCH 1/2] block: Convert .bdrv_truncate callback to coroutine_fn Kevin Wolf
2018-06-25 9:02 ` [Qemu-devel] [Qemu-block] " Stefan Hajnoczi
2018-06-25 9:51 ` Kevin Wolf
2018-06-25 14:15 ` Max Reitz
2018-06-21 17:06 ` [Qemu-devel] [PATCH 2/2] file-posix: Make .bdrv_co_truncate asynchronous Kevin Wolf
2018-06-25 8:56 ` [Qemu-devel] [Qemu-block] " Stefan Hajnoczi
2018-06-25 9:18 ` Kevin Wolf
2018-06-25 14:24 ` [Qemu-devel] " Max Reitz
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).