* [PULL 01/18] blockdev: refactor transaction to use Transaction API
2023-05-17 16:50 [PULL 00/18] Block layer patches Kevin Wolf
@ 2023-05-17 16:50 ` Kevin Wolf
2023-05-17 16:51 ` [PULL 02/18] blockdev: transactions: rename some things Kevin Wolf
` (16 subsequent siblings)
17 siblings, 0 replies; 24+ messages in thread
From: Kevin Wolf @ 2023-05-17 16:50 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, richard.henderson, qemu-devel
From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
We are going to add more block-graph modifying transaction actions,
and block-graph modifying functions are already based on Transaction
API.
Next, we'll need to separately update permissions after several
graph-modifying actions, and this would be simple with help of
Transaction API.
So, now let's just transform what we have into new-style transaction
actions.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Message-Id: <20230510150624.310640-2-vsementsov@yandex-team.ru>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
blockdev.c | 309 ++++++++++++++++++++++++++++++-----------------------
1 file changed, 178 insertions(+), 131 deletions(-)
diff --git a/blockdev.c b/blockdev.c
index d141ca7a2d..3c72d40f51 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1210,10 +1210,7 @@ typedef struct BlkActionState BlkActionState;
*/
typedef struct BlkActionOps {
size_t instance_size;
- void (*prepare)(BlkActionState *common, Error **errp);
- void (*commit)(BlkActionState *common);
- void (*abort)(BlkActionState *common);
- void (*clean)(BlkActionState *common);
+ void (*action)(BlkActionState *common, Transaction *tran, Error **errp);
} BlkActionOps;
/**
@@ -1245,6 +1242,12 @@ typedef struct InternalSnapshotState {
bool created;
} InternalSnapshotState;
+static void internal_snapshot_abort(void *opaque);
+static void internal_snapshot_clean(void *opaque);
+TransactionActionDrv internal_snapshot_drv = {
+ .abort = internal_snapshot_abort,
+ .clean = internal_snapshot_clean,
+};
static int action_check_completion_mode(BlkActionState *s, Error **errp)
{
@@ -1259,8 +1262,8 @@ static int action_check_completion_mode(BlkActionState *s, Error **errp)
return 0;
}
-static void internal_snapshot_prepare(BlkActionState *common,
- Error **errp)
+static void internal_snapshot_action(BlkActionState *common,
+ Transaction *tran, Error **errp)
{
Error *local_err = NULL;
const char *device;
@@ -1279,6 +1282,8 @@ static void internal_snapshot_prepare(BlkActionState *common,
internal = common->action->u.blockdev_snapshot_internal_sync.data;
state = DO_UPCAST(InternalSnapshotState, common, common);
+ tran_add(tran, &internal_snapshot_drv, state);
+
/* 1. parse input */
device = internal->device;
name = internal->name;
@@ -1363,10 +1368,9 @@ out:
aio_context_release(aio_context);
}
-static void internal_snapshot_abort(BlkActionState *common)
+static void internal_snapshot_abort(void *opaque)
{
- InternalSnapshotState *state =
- DO_UPCAST(InternalSnapshotState, common, common);
+ InternalSnapshotState *state = opaque;
BlockDriverState *bs = state->bs;
QEMUSnapshotInfo *sn = &state->sn;
AioContext *aio_context;
@@ -1390,10 +1394,9 @@ static void internal_snapshot_abort(BlkActionState *common)
aio_context_release(aio_context);
}
-static void internal_snapshot_clean(BlkActionState *common)
+static void internal_snapshot_clean(void *opaque)
{
- InternalSnapshotState *state = DO_UPCAST(InternalSnapshotState,
- common, common);
+ g_autofree InternalSnapshotState *state = opaque;
AioContext *aio_context;
if (!state->bs) {
@@ -1416,8 +1419,17 @@ typedef struct ExternalSnapshotState {
bool overlay_appended;
} ExternalSnapshotState;
-static void external_snapshot_prepare(BlkActionState *common,
- Error **errp)
+static void external_snapshot_commit(void *opaque);
+static void external_snapshot_abort(void *opaque);
+static void external_snapshot_clean(void *opaque);
+TransactionActionDrv external_snapshot_drv = {
+ .commit = external_snapshot_commit,
+ .abort = external_snapshot_abort,
+ .clean = external_snapshot_clean,
+};
+
+static void external_snapshot_action(BlkActionState *common, Transaction *tran,
+ Error **errp)
{
int ret;
int flags = 0;
@@ -1436,6 +1448,8 @@ static void external_snapshot_prepare(BlkActionState *common,
AioContext *aio_context;
uint64_t perm, shared;
+ tran_add(tran, &external_snapshot_drv, state);
+
/* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar
* purpose but a different set of parameters */
switch (action->type) {
@@ -1588,10 +1602,9 @@ out:
aio_context_release(aio_context);
}
-static void external_snapshot_commit(BlkActionState *common)
+static void external_snapshot_commit(void *opaque)
{
- ExternalSnapshotState *state =
- DO_UPCAST(ExternalSnapshotState, common, common);
+ ExternalSnapshotState *state = opaque;
AioContext *aio_context;
aio_context = bdrv_get_aio_context(state->old_bs);
@@ -1607,10 +1620,9 @@ static void external_snapshot_commit(BlkActionState *common)
aio_context_release(aio_context);
}
-static void external_snapshot_abort(BlkActionState *common)
+static void external_snapshot_abort(void *opaque)
{
- ExternalSnapshotState *state =
- DO_UPCAST(ExternalSnapshotState, common, common);
+ ExternalSnapshotState *state = opaque;
if (state->new_bs) {
if (state->overlay_appended) {
AioContext *aio_context;
@@ -1650,10 +1662,9 @@ static void external_snapshot_abort(BlkActionState *common)
}
}
-static void external_snapshot_clean(BlkActionState *common)
+static void external_snapshot_clean(void *opaque)
{
- ExternalSnapshotState *state =
- DO_UPCAST(ExternalSnapshotState, common, common);
+ g_autofree ExternalSnapshotState *state = opaque;
AioContext *aio_context;
if (!state->old_bs) {
@@ -1681,7 +1692,17 @@ static BlockJob *do_backup_common(BackupCommon *backup,
AioContext *aio_context,
JobTxn *txn, Error **errp);
-static void drive_backup_prepare(BlkActionState *common, Error **errp)
+static void drive_backup_commit(void *opaque);
+static void drive_backup_abort(void *opaque);
+static void drive_backup_clean(void *opaque);
+TransactionActionDrv drive_backup_drv = {
+ .commit = drive_backup_commit,
+ .abort = drive_backup_abort,
+ .clean = drive_backup_clean,
+};
+
+static void drive_backup_action(BlkActionState *common, Transaction *tran,
+ Error **errp)
{
DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
DriveBackup *backup;
@@ -1698,6 +1719,8 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
bool set_backing_hd = false;
int ret;
+ tran_add(tran, &drive_backup_drv, state);
+
assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
backup = common->action->u.drive_backup.data;
@@ -1828,9 +1851,9 @@ out:
aio_context_release(aio_context);
}
-static void drive_backup_commit(BlkActionState *common)
+static void drive_backup_commit(void *opaque)
{
- DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
+ DriveBackupState *state = opaque;
AioContext *aio_context;
aio_context = bdrv_get_aio_context(state->bs);
@@ -1842,18 +1865,18 @@ static void drive_backup_commit(BlkActionState *common)
aio_context_release(aio_context);
}
-static void drive_backup_abort(BlkActionState *common)
+static void drive_backup_abort(void *opaque)
{
- DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
+ DriveBackupState *state = opaque;
if (state->job) {
job_cancel_sync(&state->job->job, true);
}
}
-static void drive_backup_clean(BlkActionState *common)
+static void drive_backup_clean(void *opaque)
{
- DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
+ g_autofree DriveBackupState *state = opaque;
AioContext *aio_context;
if (!state->bs) {
@@ -1874,7 +1897,17 @@ typedef struct BlockdevBackupState {
BlockJob *job;
} BlockdevBackupState;
-static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
+static void blockdev_backup_commit(void *opaque);
+static void blockdev_backup_abort(void *opaque);
+static void blockdev_backup_clean(void *opaque);
+TransactionActionDrv blockdev_backup_drv = {
+ .commit = blockdev_backup_commit,
+ .abort = blockdev_backup_abort,
+ .clean = blockdev_backup_clean,
+};
+
+static void blockdev_backup_action(BlkActionState *common, Transaction *tran,
+ Error **errp)
{
BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
BlockdevBackup *backup;
@@ -1884,6 +1917,8 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
AioContext *old_context;
int ret;
+ tran_add(tran, &blockdev_backup_drv, state);
+
assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP);
backup = common->action->u.blockdev_backup.data;
@@ -1922,9 +1957,9 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
aio_context_release(aio_context);
}
-static void blockdev_backup_commit(BlkActionState *common)
+static void blockdev_backup_commit(void *opaque)
{
- BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
+ BlockdevBackupState *state = opaque;
AioContext *aio_context;
aio_context = bdrv_get_aio_context(state->bs);
@@ -1936,18 +1971,18 @@ static void blockdev_backup_commit(BlkActionState *common)
aio_context_release(aio_context);
}
-static void blockdev_backup_abort(BlkActionState *common)
+static void blockdev_backup_abort(void *opaque)
{
- BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
+ BlockdevBackupState *state = opaque;
if (state->job) {
job_cancel_sync(&state->job->job, true);
}
}
-static void blockdev_backup_clean(BlkActionState *common)
+static void blockdev_backup_clean(void *opaque)
{
- BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
+ g_autofree BlockdevBackupState *state = opaque;
AioContext *aio_context;
if (!state->bs) {
@@ -1971,14 +2006,22 @@ typedef struct BlockDirtyBitmapState {
bool was_enabled;
} BlockDirtyBitmapState;
-static void block_dirty_bitmap_add_prepare(BlkActionState *common,
- Error **errp)
+static void block_dirty_bitmap_add_abort(void *opaque);
+TransactionActionDrv block_dirty_bitmap_add_drv = {
+ .abort = block_dirty_bitmap_add_abort,
+ .clean = g_free,
+};
+
+static void block_dirty_bitmap_add_action(BlkActionState *common,
+ Transaction *tran, Error **errp)
{
Error *local_err = NULL;
BlockDirtyBitmapAdd *action;
BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
common, common);
+ tran_add(tran, &block_dirty_bitmap_add_drv, state);
+
if (action_check_completion_mode(common, errp) < 0) {
return;
}
@@ -1998,13 +2041,12 @@ static void block_dirty_bitmap_add_prepare(BlkActionState *common,
}
}
-static void block_dirty_bitmap_add_abort(BlkActionState *common)
+static void block_dirty_bitmap_add_abort(void *opaque)
{
BlockDirtyBitmapAdd *action;
- BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
- common, common);
+ BlockDirtyBitmapState *state = opaque;
- action = common->action->u.block_dirty_bitmap_add.data;
+ action = state->common.action->u.block_dirty_bitmap_add.data;
/* Should not be able to fail: IF the bitmap was added via .prepare(),
* then the node reference and bitmap name must have been valid.
*/
@@ -2013,13 +2055,23 @@ static void block_dirty_bitmap_add_abort(BlkActionState *common)
}
}
-static void block_dirty_bitmap_clear_prepare(BlkActionState *common,
- Error **errp)
+static void block_dirty_bitmap_restore(void *opaque);
+static void block_dirty_bitmap_free_backup(void *opaque);
+TransactionActionDrv block_dirty_bitmap_clear_drv = {
+ .abort = block_dirty_bitmap_restore,
+ .commit = block_dirty_bitmap_free_backup,
+ .clean = g_free,
+};
+
+static void block_dirty_bitmap_clear_action(BlkActionState *common,
+ Transaction *tran, Error **errp)
{
BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
common, common);
BlockDirtyBitmap *action;
+ tran_add(tran, &block_dirty_bitmap_clear_drv, state);
+
if (action_check_completion_mode(common, errp) < 0) {
return;
}
@@ -2040,31 +2092,37 @@ static void block_dirty_bitmap_clear_prepare(BlkActionState *common,
bdrv_clear_dirty_bitmap(state->bitmap, &state->backup);
}
-static void block_dirty_bitmap_restore(BlkActionState *common)
+static void block_dirty_bitmap_restore(void *opaque)
{
- BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
- common, common);
+ BlockDirtyBitmapState *state = opaque;
if (state->backup) {
bdrv_restore_dirty_bitmap(state->bitmap, state->backup);
}
}
-static void block_dirty_bitmap_free_backup(BlkActionState *common)
+static void block_dirty_bitmap_free_backup(void *opaque)
{
- BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
- common, common);
+ BlockDirtyBitmapState *state = opaque;
hbitmap_free(state->backup);
}
-static void block_dirty_bitmap_enable_prepare(BlkActionState *common,
- Error **errp)
+static void block_dirty_bitmap_enable_abort(void *opaque);
+TransactionActionDrv block_dirty_bitmap_enable_drv = {
+ .abort = block_dirty_bitmap_enable_abort,
+ .clean = g_free,
+};
+
+static void block_dirty_bitmap_enable_action(BlkActionState *common,
+ Transaction *tran, Error **errp)
{
BlockDirtyBitmap *action;
BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
common, common);
+ tran_add(tran, &block_dirty_bitmap_enable_drv, state);
+
if (action_check_completion_mode(common, errp) < 0) {
return;
}
@@ -2086,23 +2144,30 @@ static void block_dirty_bitmap_enable_prepare(BlkActionState *common,
bdrv_enable_dirty_bitmap(state->bitmap);
}
-static void block_dirty_bitmap_enable_abort(BlkActionState *common)
+static void block_dirty_bitmap_enable_abort(void *opaque)
{
- BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
- common, common);
+ BlockDirtyBitmapState *state = opaque;
if (!state->was_enabled) {
bdrv_disable_dirty_bitmap(state->bitmap);
}
}
-static void block_dirty_bitmap_disable_prepare(BlkActionState *common,
- Error **errp)
+static void block_dirty_bitmap_disable_abort(void *opaque);
+TransactionActionDrv block_dirty_bitmap_disable_drv = {
+ .abort = block_dirty_bitmap_disable_abort,
+ .clean = g_free,
+};
+
+static void block_dirty_bitmap_disable_action(BlkActionState *common,
+ Transaction *tran, Error **errp)
{
BlockDirtyBitmap *action;
BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
common, common);
+ tran_add(tran, &block_dirty_bitmap_disable_drv, state);
+
if (action_check_completion_mode(common, errp) < 0) {
return;
}
@@ -2124,23 +2189,30 @@ static void block_dirty_bitmap_disable_prepare(BlkActionState *common,
bdrv_disable_dirty_bitmap(state->bitmap);
}
-static void block_dirty_bitmap_disable_abort(BlkActionState *common)
+static void block_dirty_bitmap_disable_abort(void *opaque)
{
- BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
- common, common);
+ BlockDirtyBitmapState *state = opaque;
if (state->was_enabled) {
bdrv_enable_dirty_bitmap(state->bitmap);
}
}
-static void block_dirty_bitmap_merge_prepare(BlkActionState *common,
- Error **errp)
+TransactionActionDrv block_dirty_bitmap_merge_drv = {
+ .commit = block_dirty_bitmap_free_backup,
+ .abort = block_dirty_bitmap_restore,
+ .clean = g_free,
+};
+
+static void block_dirty_bitmap_merge_action(BlkActionState *common,
+ Transaction *tran, Error **errp)
{
BlockDirtyBitmapMerge *action;
BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
common, common);
+ tran_add(tran, &block_dirty_bitmap_merge_drv, state);
+
if (action_check_completion_mode(common, errp) < 0) {
return;
}
@@ -2152,13 +2224,23 @@ static void block_dirty_bitmap_merge_prepare(BlkActionState *common,
errp);
}
-static void block_dirty_bitmap_remove_prepare(BlkActionState *common,
- Error **errp)
+static void block_dirty_bitmap_remove_commit(void *opaque);
+static void block_dirty_bitmap_remove_abort(void *opaque);
+TransactionActionDrv block_dirty_bitmap_remove_drv = {
+ .commit = block_dirty_bitmap_remove_commit,
+ .abort = block_dirty_bitmap_remove_abort,
+ .clean = g_free,
+};
+
+static void block_dirty_bitmap_remove_action(BlkActionState *common,
+ Transaction *tran, Error **errp)
{
BlockDirtyBitmap *action;
BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
common, common);
+ tran_add(tran, &block_dirty_bitmap_remove_drv, state);
+
if (action_check_completion_mode(common, errp) < 0) {
return;
}
@@ -2173,10 +2255,9 @@ static void block_dirty_bitmap_remove_prepare(BlkActionState *common,
}
}
-static void block_dirty_bitmap_remove_abort(BlkActionState *common)
+static void block_dirty_bitmap_remove_abort(void *opaque)
{
- BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
- common, common);
+ BlockDirtyBitmapState *state = opaque;
if (state->bitmap) {
bdrv_dirty_bitmap_skip_store(state->bitmap, false);
@@ -2184,21 +2265,28 @@ static void block_dirty_bitmap_remove_abort(BlkActionState *common)
}
}
-static void block_dirty_bitmap_remove_commit(BlkActionState *common)
+static void block_dirty_bitmap_remove_commit(void *opaque)
{
- BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
- common, common);
+ BlockDirtyBitmapState *state = opaque;
bdrv_dirty_bitmap_set_busy(state->bitmap, false);
bdrv_release_dirty_bitmap(state->bitmap);
}
-static void abort_prepare(BlkActionState *common, Error **errp)
+static void abort_commit(void *opaque);
+TransactionActionDrv abort_drv = {
+ .commit = abort_commit,
+ .clean = g_free,
+};
+
+static void abort_action(BlkActionState *common, Transaction *tran,
+ Error **errp)
{
+ tran_add(tran, &abort_drv, common);
error_setg(errp, "Transaction aborted using Abort action");
}
-static void abort_commit(BlkActionState *common)
+static void abort_commit(void *opaque)
{
g_assert_not_reached(); /* this action never succeeds */
}
@@ -2206,75 +2294,51 @@ static void abort_commit(BlkActionState *common)
static const BlkActionOps actions[] = {
[TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT] = {
.instance_size = sizeof(ExternalSnapshotState),
- .prepare = external_snapshot_prepare,
- .commit = external_snapshot_commit,
- .abort = external_snapshot_abort,
- .clean = external_snapshot_clean,
+ .action = external_snapshot_action,
},
[TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC] = {
.instance_size = sizeof(ExternalSnapshotState),
- .prepare = external_snapshot_prepare,
- .commit = external_snapshot_commit,
- .abort = external_snapshot_abort,
- .clean = external_snapshot_clean,
+ .action = external_snapshot_action,
},
[TRANSACTION_ACTION_KIND_DRIVE_BACKUP] = {
.instance_size = sizeof(DriveBackupState),
- .prepare = drive_backup_prepare,
- .commit = drive_backup_commit,
- .abort = drive_backup_abort,
- .clean = drive_backup_clean,
+ .action = drive_backup_action,
},
[TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = {
.instance_size = sizeof(BlockdevBackupState),
- .prepare = blockdev_backup_prepare,
- .commit = blockdev_backup_commit,
- .abort = blockdev_backup_abort,
- .clean = blockdev_backup_clean,
+ .action = blockdev_backup_action,
},
[TRANSACTION_ACTION_KIND_ABORT] = {
.instance_size = sizeof(BlkActionState),
- .prepare = abort_prepare,
- .commit = abort_commit,
+ .action = abort_action,
},
[TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC] = {
.instance_size = sizeof(InternalSnapshotState),
- .prepare = internal_snapshot_prepare,
- .abort = internal_snapshot_abort,
- .clean = internal_snapshot_clean,
+ .action = internal_snapshot_action,
},
[TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ADD] = {
.instance_size = sizeof(BlockDirtyBitmapState),
- .prepare = block_dirty_bitmap_add_prepare,
- .abort = block_dirty_bitmap_add_abort,
+ .action = block_dirty_bitmap_add_action,
},
[TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_CLEAR] = {
.instance_size = sizeof(BlockDirtyBitmapState),
- .prepare = block_dirty_bitmap_clear_prepare,
- .commit = block_dirty_bitmap_free_backup,
- .abort = block_dirty_bitmap_restore,
+ .action = block_dirty_bitmap_clear_action,
},
[TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ENABLE] = {
.instance_size = sizeof(BlockDirtyBitmapState),
- .prepare = block_dirty_bitmap_enable_prepare,
- .abort = block_dirty_bitmap_enable_abort,
+ .action = block_dirty_bitmap_enable_action,
},
[TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_DISABLE] = {
.instance_size = sizeof(BlockDirtyBitmapState),
- .prepare = block_dirty_bitmap_disable_prepare,
- .abort = block_dirty_bitmap_disable_abort,
+ .action = block_dirty_bitmap_disable_action,
},
[TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_MERGE] = {
.instance_size = sizeof(BlockDirtyBitmapState),
- .prepare = block_dirty_bitmap_merge_prepare,
- .commit = block_dirty_bitmap_free_backup,
- .abort = block_dirty_bitmap_restore,
+ .action = block_dirty_bitmap_merge_action,
},
[TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_REMOVE] = {
.instance_size = sizeof(BlockDirtyBitmapState),
- .prepare = block_dirty_bitmap_remove_prepare,
- .commit = block_dirty_bitmap_remove_commit,
- .abort = block_dirty_bitmap_remove_abort,
+ .action = block_dirty_bitmap_remove_action,
},
/* Where are transactions for MIRROR, COMMIT and STREAM?
* Although these blockjobs use transaction callbacks like the backup job,
@@ -2316,14 +2380,11 @@ void qmp_transaction(TransactionActionList *dev_list,
TransactionActionList *dev_entry = dev_list;
bool has_props = !!props;
JobTxn *block_job_txn = NULL;
- BlkActionState *state, *next;
Error *local_err = NULL;
+ Transaction *tran = tran_new();
GLOBAL_STATE_CODE();
- QTAILQ_HEAD(, BlkActionState) snap_bdrv_states;
- QTAILQ_INIT(&snap_bdrv_states);
-
/* Does this transaction get canceled as a group on failure?
* If not, we don't really need to make a JobTxn.
*/
@@ -2339,6 +2400,7 @@ void qmp_transaction(TransactionActionList *dev_list,
while (NULL != dev_entry) {
TransactionAction *dev_info = NULL;
const BlkActionOps *ops;
+ BlkActionState *state;
dev_info = dev_entry->value;
dev_entry = dev_entry->next;
@@ -2353,38 +2415,23 @@ void qmp_transaction(TransactionActionList *dev_list,
state->action = dev_info;
state->block_job_txn = block_job_txn;
state->txn_props = props;
- QTAILQ_INSERT_TAIL(&snap_bdrv_states, state, entry);
- state->ops->prepare(state, &local_err);
+ state->ops->action(state, tran, &local_err);
if (local_err) {
error_propagate(errp, local_err);
goto delete_and_fail;
}
}
- QTAILQ_FOREACH(state, &snap_bdrv_states, entry) {
- if (state->ops->commit) {
- state->ops->commit(state);
- }
- }
+ tran_commit(tran);
/* success */
goto exit;
delete_and_fail:
/* failure, and it is all-or-none; roll back all operations */
- QTAILQ_FOREACH_REVERSE(state, &snap_bdrv_states, entry) {
- if (state->ops->abort) {
- state->ops->abort(state);
- }
- }
+ tran_abort(tran);
exit:
- QTAILQ_FOREACH_SAFE(state, &snap_bdrv_states, entry, next) {
- if (state->ops->clean) {
- state->ops->clean(state);
- }
- g_free(state);
- }
if (!has_props) {
qapi_free_TransactionProperties(props);
}
--
2.40.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [PULL 02/18] blockdev: transactions: rename some things
2023-05-17 16:50 [PULL 00/18] Block layer patches Kevin Wolf
2023-05-17 16:50 ` [PULL 01/18] blockdev: refactor transaction to use Transaction API Kevin Wolf
@ 2023-05-17 16:51 ` Kevin Wolf
2023-05-17 16:51 ` [PULL 03/18] blockdev: qmp_transaction: refactor loop to classic for Kevin Wolf
` (15 subsequent siblings)
17 siblings, 0 replies; 24+ messages in thread
From: Kevin Wolf @ 2023-05-17 16:51 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, richard.henderson, qemu-devel
From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Look at qmp_transaction(): dev_list is not obvious name for list of
actions. Let's look at qapi spec, this argument is "actions". Let's
follow the common practice of using same argument names in qapi scheme
and code.
To be honest, rename props to properties for same reason.
Next, we have to rename global map of actions, to not conflict with new
name for function argument.
Rename also dev_entry loop variable accordingly to new name of the
list.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20230510150624.310640-3-vsementsov@yandex-team.ru>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
blockdev.c | 30 +++++++++++++++---------------
1 file changed, 15 insertions(+), 15 deletions(-)
diff --git a/blockdev.c b/blockdev.c
index 3c72d40f51..75e313f403 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2291,7 +2291,7 @@ static void abort_commit(void *opaque)
g_assert_not_reached(); /* this action never succeeds */
}
-static const BlkActionOps actions[] = {
+static const BlkActionOps actions_map[] = {
[TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT] = {
.instance_size = sizeof(ExternalSnapshotState),
.action = external_snapshot_action,
@@ -2373,12 +2373,12 @@ static TransactionProperties *get_transaction_properties(
*
* Always run under BQL.
*/
-void qmp_transaction(TransactionActionList *dev_list,
- struct TransactionProperties *props,
+void qmp_transaction(TransactionActionList *actions,
+ struct TransactionProperties *properties,
Error **errp)
{
- TransactionActionList *dev_entry = dev_list;
- bool has_props = !!props;
+ TransactionActionList *act = actions;
+ bool has_properties = !!properties;
JobTxn *block_job_txn = NULL;
Error *local_err = NULL;
Transaction *tran = tran_new();
@@ -2388,8 +2388,8 @@ void qmp_transaction(TransactionActionList *dev_list,
/* Does this transaction get canceled as a group on failure?
* If not, we don't really need to make a JobTxn.
*/
- props = get_transaction_properties(props);
- if (props->completion_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) {
+ properties = get_transaction_properties(properties);
+ if (properties->completion_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) {
block_job_txn = job_txn_new();
}
@@ -2397,24 +2397,24 @@ void qmp_transaction(TransactionActionList *dev_list,
bdrv_drain_all();
/* We don't do anything in this loop that commits us to the operations */
- while (NULL != dev_entry) {
+ while (NULL != act) {
TransactionAction *dev_info = NULL;
const BlkActionOps *ops;
BlkActionState *state;
- dev_info = dev_entry->value;
- dev_entry = dev_entry->next;
+ dev_info = act->value;
+ act = act->next;
- assert(dev_info->type < ARRAY_SIZE(actions));
+ assert(dev_info->type < ARRAY_SIZE(actions_map));
- ops = &actions[dev_info->type];
+ ops = &actions_map[dev_info->type];
assert(ops->instance_size > 0);
state = g_malloc0(ops->instance_size);
state->ops = ops;
state->action = dev_info;
state->block_job_txn = block_job_txn;
- state->txn_props = props;
+ state->txn_props = properties;
state->ops->action(state, tran, &local_err);
if (local_err) {
@@ -2432,8 +2432,8 @@ delete_and_fail:
/* failure, and it is all-or-none; roll back all operations */
tran_abort(tran);
exit:
- if (!has_props) {
- qapi_free_TransactionProperties(props);
+ if (!has_properties) {
+ qapi_free_TransactionProperties(properties);
}
job_txn_unref(block_job_txn);
}
--
2.40.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [PULL 03/18] blockdev: qmp_transaction: refactor loop to classic for
2023-05-17 16:50 [PULL 00/18] Block layer patches Kevin Wolf
2023-05-17 16:50 ` [PULL 01/18] blockdev: refactor transaction to use Transaction API Kevin Wolf
2023-05-17 16:51 ` [PULL 02/18] blockdev: transactions: rename some things Kevin Wolf
@ 2023-05-17 16:51 ` Kevin Wolf
2023-05-17 16:51 ` [PULL 04/18] blockdev: transaction: refactor handling transaction properties Kevin Wolf
` (14 subsequent siblings)
17 siblings, 0 replies; 24+ messages in thread
From: Kevin Wolf @ 2023-05-17 16:51 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, richard.henderson, qemu-devel
From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20230510150624.310640-4-vsementsov@yandex-team.ru>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
blockdev.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/blockdev.c b/blockdev.c
index 75e313f403..dd0e98bbe1 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2377,7 +2377,7 @@ void qmp_transaction(TransactionActionList *actions,
struct TransactionProperties *properties,
Error **errp)
{
- TransactionActionList *act = actions;
+ TransactionActionList *act;
bool has_properties = !!properties;
JobTxn *block_job_txn = NULL;
Error *local_err = NULL;
@@ -2397,14 +2397,11 @@ void qmp_transaction(TransactionActionList *actions,
bdrv_drain_all();
/* We don't do anything in this loop that commits us to the operations */
- while (NULL != act) {
- TransactionAction *dev_info = NULL;
+ for (act = actions; act; act = act->next) {
+ TransactionAction *dev_info = act->value;
const BlkActionOps *ops;
BlkActionState *state;
- dev_info = act->value;
- act = act->next;
-
assert(dev_info->type < ARRAY_SIZE(actions_map));
ops = &actions_map[dev_info->type];
--
2.40.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [PULL 04/18] blockdev: transaction: refactor handling transaction properties
2023-05-17 16:50 [PULL 00/18] Block layer patches Kevin Wolf
` (2 preceding siblings ...)
2023-05-17 16:51 ` [PULL 03/18] blockdev: qmp_transaction: refactor loop to classic for Kevin Wolf
@ 2023-05-17 16:51 ` Kevin Wolf
2023-05-17 16:51 ` [PULL 05/18] blockdev: use state.bitmap in block-dirty-bitmap-add action Kevin Wolf
` (13 subsequent siblings)
17 siblings, 0 replies; 24+ messages in thread
From: Kevin Wolf @ 2023-05-17 16:51 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, richard.henderson, qemu-devel
From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Only backup supports GROUPED mode. Make this logic more clear. And
avoid passing extra thing to each action.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Message-Id: <20230510150624.310640-5-vsementsov@yandex-team.ru>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
blockdev.c | 96 +++++++++++++-----------------------------------------
1 file changed, 22 insertions(+), 74 deletions(-)
diff --git a/blockdev.c b/blockdev.c
index dd0e98bbe1..a2ebaa5afc 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1230,7 +1230,6 @@ struct BlkActionState {
TransactionAction *action;
const BlkActionOps *ops;
JobTxn *block_job_txn;
- TransactionProperties *txn_props;
QTAILQ_ENTRY(BlkActionState) entry;
};
@@ -1249,19 +1248,6 @@ TransactionActionDrv internal_snapshot_drv = {
.clean = internal_snapshot_clean,
};
-static int action_check_completion_mode(BlkActionState *s, Error **errp)
-{
- if (s->txn_props->completion_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) {
- error_setg(errp,
- "Action '%s' does not support Transaction property "
- "completion-mode = %s",
- TransactionActionKind_str(s->action->type),
- ActionCompletionMode_str(s->txn_props->completion_mode));
- return -1;
- }
- return 0;
-}
-
static void internal_snapshot_action(BlkActionState *common,
Transaction *tran, Error **errp)
{
@@ -1284,15 +1270,9 @@ static void internal_snapshot_action(BlkActionState *common,
tran_add(tran, &internal_snapshot_drv, state);
- /* 1. parse input */
device = internal->device;
name = internal->name;
- /* 2. check for validation */
- if (action_check_completion_mode(common, errp) < 0) {
- return;
- }
-
bs = qmp_get_root_bs(device, errp);
if (!bs) {
return;
@@ -1476,9 +1456,6 @@ static void external_snapshot_action(BlkActionState *common, Transaction *tran,
}
/* start processing */
- if (action_check_completion_mode(common, errp) < 0) {
- return;
- }
state->old_bs = bdrv_lookup_bs(device, node_name, errp);
if (!state->old_bs) {
@@ -2022,10 +1999,6 @@ static void block_dirty_bitmap_add_action(BlkActionState *common,
tran_add(tran, &block_dirty_bitmap_add_drv, state);
- if (action_check_completion_mode(common, errp) < 0) {
- return;
- }
-
action = common->action->u.block_dirty_bitmap_add.data;
/* AIO context taken and released within qmp_block_dirty_bitmap_add */
qmp_block_dirty_bitmap_add(action->node, action->name,
@@ -2072,10 +2045,6 @@ static void block_dirty_bitmap_clear_action(BlkActionState *common,
tran_add(tran, &block_dirty_bitmap_clear_drv, state);
- if (action_check_completion_mode(common, errp) < 0) {
- return;
- }
-
action = common->action->u.block_dirty_bitmap_clear.data;
state->bitmap = block_dirty_bitmap_lookup(action->node,
action->name,
@@ -2123,10 +2092,6 @@ static void block_dirty_bitmap_enable_action(BlkActionState *common,
tran_add(tran, &block_dirty_bitmap_enable_drv, state);
- if (action_check_completion_mode(common, errp) < 0) {
- return;
- }
-
action = common->action->u.block_dirty_bitmap_enable.data;
state->bitmap = block_dirty_bitmap_lookup(action->node,
action->name,
@@ -2168,10 +2133,6 @@ static void block_dirty_bitmap_disable_action(BlkActionState *common,
tran_add(tran, &block_dirty_bitmap_disable_drv, state);
- if (action_check_completion_mode(common, errp) < 0) {
- return;
- }
-
action = common->action->u.block_dirty_bitmap_disable.data;
state->bitmap = block_dirty_bitmap_lookup(action->node,
action->name,
@@ -2213,10 +2174,6 @@ static void block_dirty_bitmap_merge_action(BlkActionState *common,
tran_add(tran, &block_dirty_bitmap_merge_drv, state);
- if (action_check_completion_mode(common, errp) < 0) {
- return;
- }
-
action = common->action->u.block_dirty_bitmap_merge.data;
state->bitmap = block_dirty_bitmap_merge(action->node, action->target,
@@ -2241,10 +2198,6 @@ static void block_dirty_bitmap_remove_action(BlkActionState *common,
tran_add(tran, &block_dirty_bitmap_remove_drv, state);
- if (action_check_completion_mode(common, errp) < 0) {
- return;
- }
-
action = common->action->u.block_dirty_bitmap_remove.data;
state->bitmap = block_dirty_bitmap_remove(action->node, action->name,
@@ -2348,25 +2301,6 @@ static const BlkActionOps actions_map[] = {
*/
};
-/**
- * Allocate a TransactionProperties structure if necessary, and fill
- * that structure with desired defaults if they are unset.
- */
-static TransactionProperties *get_transaction_properties(
- TransactionProperties *props)
-{
- if (!props) {
- props = g_new0(TransactionProperties, 1);
- }
-
- if (!props->has_completion_mode) {
- props->has_completion_mode = true;
- props->completion_mode = ACTION_COMPLETION_MODE_INDIVIDUAL;
- }
-
- return props;
-}
-
/*
* 'Atomic' group operations. The operations are performed as a set, and if
* any fail then we roll back all operations in the group.
@@ -2378,24 +2312,42 @@ void qmp_transaction(TransactionActionList *actions,
Error **errp)
{
TransactionActionList *act;
- bool has_properties = !!properties;
JobTxn *block_job_txn = NULL;
Error *local_err = NULL;
- Transaction *tran = tran_new();
+ Transaction *tran;
+ ActionCompletionMode comp_mode =
+ properties ? properties->completion_mode :
+ ACTION_COMPLETION_MODE_INDIVIDUAL;
GLOBAL_STATE_CODE();
/* Does this transaction get canceled as a group on failure?
* If not, we don't really need to make a JobTxn.
*/
- properties = get_transaction_properties(properties);
- if (properties->completion_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) {
+ if (comp_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) {
+ for (act = actions; act; act = act->next) {
+ TransactionActionKind type = act->value->type;
+
+ if (type != TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP &&
+ type != TRANSACTION_ACTION_KIND_DRIVE_BACKUP)
+ {
+ error_setg(errp,
+ "Action '%s' does not support transaction property "
+ "completion-mode = %s",
+ TransactionActionKind_str(type),
+ ActionCompletionMode_str(comp_mode));
+ return;
+ }
+ }
+
block_job_txn = job_txn_new();
}
/* drain all i/o before any operations */
bdrv_drain_all();
+ tran = tran_new();
+
/* We don't do anything in this loop that commits us to the operations */
for (act = actions; act; act = act->next) {
TransactionAction *dev_info = act->value;
@@ -2411,7 +2363,6 @@ void qmp_transaction(TransactionActionList *actions,
state->ops = ops;
state->action = dev_info;
state->block_job_txn = block_job_txn;
- state->txn_props = properties;
state->ops->action(state, tran, &local_err);
if (local_err) {
@@ -2429,9 +2380,6 @@ delete_and_fail:
/* failure, and it is all-or-none; roll back all operations */
tran_abort(tran);
exit:
- if (!has_properties) {
- qapi_free_TransactionProperties(properties);
- }
job_txn_unref(block_job_txn);
}
--
2.40.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [PULL 05/18] blockdev: use state.bitmap in block-dirty-bitmap-add action
2023-05-17 16:50 [PULL 00/18] Block layer patches Kevin Wolf
` (3 preceding siblings ...)
2023-05-17 16:51 ` [PULL 04/18] blockdev: transaction: refactor handling transaction properties Kevin Wolf
@ 2023-05-17 16:51 ` Kevin Wolf
2023-05-17 16:51 ` [PULL 06/18] blockdev: qmp_transaction: drop extra generic layer Kevin Wolf
` (12 subsequent siblings)
17 siblings, 0 replies; 24+ messages in thread
From: Kevin Wolf @ 2023-05-17 16:51 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, richard.henderson, qemu-devel
From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Other bitmap related actions use the .bitmap pointer in .abort action,
let's do same here:
1. It helps further refactoring, as bitmap-add is the only bitmap
action that uses state.action in .abort
2. It must be safe: transaction actions rely on the fact that on
.abort() the state is the same as at the end of .prepare(), so that
in .abort() we could precisely rollback the changes done by
.prepare().
The only way to remove the bitmap during transaction should be
block-dirty-bitmap-remove action, but it postpones actual removal to
.commit(), so we are OK on any rollback path. (Note also that
bitmap-remove is the only bitmap action that has .commit() phase,
except for simple g_free the state on .clean())
3. Again, other bitmap actions behave this way: keep the bitmap pointer
during the transaction.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Message-Id: <20230510150624.310640-6-vsementsov@yandex-team.ru>
[kwolf: Also remove the now unused BlockDirtyBitmapState.prepared]
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
blockdev.c | 13 ++++---------
1 file changed, 4 insertions(+), 9 deletions(-)
diff --git a/blockdev.c b/blockdev.c
index a2ebaa5afc..4bf15566b2 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1979,7 +1979,6 @@ typedef struct BlockDirtyBitmapState {
BdrvDirtyBitmap *bitmap;
BlockDriverState *bs;
HBitmap *backup;
- bool prepared;
bool was_enabled;
} BlockDirtyBitmapState;
@@ -2008,7 +2007,8 @@ static void block_dirty_bitmap_add_action(BlkActionState *common,
&local_err);
if (!local_err) {
- state->prepared = true;
+ state->bitmap = block_dirty_bitmap_lookup(action->node, action->name,
+ NULL, &error_abort);
} else {
error_propagate(errp, local_err);
}
@@ -2016,15 +2016,10 @@ static void block_dirty_bitmap_add_action(BlkActionState *common,
static void block_dirty_bitmap_add_abort(void *opaque)
{
- BlockDirtyBitmapAdd *action;
BlockDirtyBitmapState *state = opaque;
- action = state->common.action->u.block_dirty_bitmap_add.data;
- /* Should not be able to fail: IF the bitmap was added via .prepare(),
- * then the node reference and bitmap name must have been valid.
- */
- if (state->prepared) {
- qmp_block_dirty_bitmap_remove(action->node, action->name, &error_abort);
+ if (state->bitmap) {
+ bdrv_release_dirty_bitmap(state->bitmap);
}
}
--
2.40.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [PULL 06/18] blockdev: qmp_transaction: drop extra generic layer
2023-05-17 16:50 [PULL 00/18] Block layer patches Kevin Wolf
` (4 preceding siblings ...)
2023-05-17 16:51 ` [PULL 05/18] blockdev: use state.bitmap in block-dirty-bitmap-add action Kevin Wolf
@ 2023-05-17 16:51 ` Kevin Wolf
2023-05-17 16:51 ` [PULL 07/18] docs/interop/qcow2.txt: fix description about "zlib" clusters Kevin Wolf
` (11 subsequent siblings)
17 siblings, 0 replies; 24+ messages in thread
From: Kevin Wolf @ 2023-05-17 16:51 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, richard.henderson, qemu-devel
From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Let's simplify things:
First, actions generally don't need access to common BlkActionState
structure. The only exclusion are backup actions that need
block_job_txn.
Next, for transaction actions of Transaction API is more native to
allocated state structure in the action itself.
So, do the following transformation:
1. Let all actions be represented by a function with corresponding
structure as arguments.
2. Instead of array-map marshaller, let's make a function, that calls
corresponding action directly.
3. BlkActionOps and BlkActionState structures become unused
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Message-Id: <20230510150624.310640-7-vsementsov@yandex-team.ru>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
blockdev.c | 265 +++++++++++++++++------------------------------------
1 file changed, 85 insertions(+), 180 deletions(-)
diff --git a/blockdev.c b/blockdev.c
index 4bf15566b2..5d56b79df4 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1188,54 +1188,8 @@ out_aio_context:
return NULL;
}
-/* New and old BlockDriverState structs for atomic group operations */
-
-typedef struct BlkActionState BlkActionState;
-
-/**
- * BlkActionOps:
- * Table of operations that define an Action.
- *
- * @instance_size: Size of state struct, in bytes.
- * @prepare: Prepare the work, must NOT be NULL.
- * @commit: Commit the changes, can be NULL.
- * @abort: Abort the changes on fail, can be NULL.
- * @clean: Clean up resources after all transaction actions have called
- * commit() or abort(). Can be NULL.
- *
- * Only prepare() may fail. In a single transaction, only one of commit() or
- * abort() will be called. clean() will always be called if it is present.
- *
- * Always run under BQL.
- */
-typedef struct BlkActionOps {
- size_t instance_size;
- void (*action)(BlkActionState *common, Transaction *tran, Error **errp);
-} BlkActionOps;
-
-/**
- * BlkActionState:
- * Describes one Action's state within a Transaction.
- *
- * @action: QAPI-defined enum identifying which Action to perform.
- * @ops: Table of ActionOps this Action can perform.
- * @block_job_txn: Transaction which this action belongs to.
- * @entry: List membership for all Actions in this Transaction.
- *
- * This structure must be arranged as first member in a subclassed type,
- * assuming that the compiler will also arrange it to the same offsets as the
- * base class.
- */
-struct BlkActionState {
- TransactionAction *action;
- const BlkActionOps *ops;
- JobTxn *block_job_txn;
- QTAILQ_ENTRY(BlkActionState) entry;
-};
-
/* internal snapshot private data */
typedef struct InternalSnapshotState {
- BlkActionState common;
BlockDriverState *bs;
QEMUSnapshotInfo sn;
bool created;
@@ -1248,7 +1202,7 @@ TransactionActionDrv internal_snapshot_drv = {
.clean = internal_snapshot_clean,
};
-static void internal_snapshot_action(BlkActionState *common,
+static void internal_snapshot_action(BlockdevSnapshotInternal *internal,
Transaction *tran, Error **errp)
{
Error *local_err = NULL;
@@ -1258,16 +1212,10 @@ static void internal_snapshot_action(BlkActionState *common,
QEMUSnapshotInfo old_sn, *sn;
bool ret;
int64_t rt;
- BlockdevSnapshotInternal *internal;
- InternalSnapshotState *state;
+ InternalSnapshotState *state = g_new0(InternalSnapshotState, 1);
AioContext *aio_context;
int ret1;
- g_assert(common->action->type ==
- TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC);
- internal = common->action->u.blockdev_snapshot_internal_sync.data;
- state = DO_UPCAST(InternalSnapshotState, common, common);
-
tran_add(tran, &internal_snapshot_drv, state);
device = internal->device;
@@ -1393,7 +1341,6 @@ static void internal_snapshot_clean(void *opaque)
/* external snapshot private data */
typedef struct ExternalSnapshotState {
- BlkActionState common;
BlockDriverState *old_bs;
BlockDriverState *new_bs;
bool overlay_appended;
@@ -1408,8 +1355,8 @@ TransactionActionDrv external_snapshot_drv = {
.clean = external_snapshot_clean,
};
-static void external_snapshot_action(BlkActionState *common, Transaction *tran,
- Error **errp)
+static void external_snapshot_action(TransactionAction *action,
+ Transaction *tran, Error **errp)
{
int ret;
int flags = 0;
@@ -1422,9 +1369,7 @@ static void external_snapshot_action(BlkActionState *common, Transaction *tran,
const char *snapshot_ref;
/* File name of the new image (for 'blockdev-snapshot-sync') */
const char *new_image_file;
- ExternalSnapshotState *state =
- DO_UPCAST(ExternalSnapshotState, common, common);
- TransactionAction *action = common->action;
+ ExternalSnapshotState *state = g_new0(ExternalSnapshotState, 1);
AioContext *aio_context;
uint64_t perm, shared;
@@ -1658,7 +1603,6 @@ static void external_snapshot_clean(void *opaque)
}
typedef struct DriveBackupState {
- BlkActionState common;
BlockDriverState *bs;
BlockJob *job;
} DriveBackupState;
@@ -1678,11 +1622,11 @@ TransactionActionDrv drive_backup_drv = {
.clean = drive_backup_clean,
};
-static void drive_backup_action(BlkActionState *common, Transaction *tran,
- Error **errp)
+static void drive_backup_action(DriveBackup *backup,
+ JobTxn *block_job_txn,
+ Transaction *tran, Error **errp)
{
- DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
- DriveBackup *backup;
+ DriveBackupState *state = g_new0(DriveBackupState, 1);
BlockDriverState *bs;
BlockDriverState *target_bs;
BlockDriverState *source = NULL;
@@ -1698,9 +1642,6 @@ static void drive_backup_action(BlkActionState *common, Transaction *tran,
tran_add(tran, &drive_backup_drv, state);
- assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
- backup = common->action->u.drive_backup.data;
-
if (!backup->has_mode) {
backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
}
@@ -1820,7 +1761,7 @@ static void drive_backup_action(BlkActionState *common, Transaction *tran,
state->job = do_backup_common(qapi_DriveBackup_base(backup),
bs, target_bs, aio_context,
- common->block_job_txn, errp);
+ block_job_txn, errp);
unref:
bdrv_unref(target_bs);
@@ -1869,7 +1810,6 @@ static void drive_backup_clean(void *opaque)
}
typedef struct BlockdevBackupState {
- BlkActionState common;
BlockDriverState *bs;
BlockJob *job;
} BlockdevBackupState;
@@ -1883,11 +1823,11 @@ TransactionActionDrv blockdev_backup_drv = {
.clean = blockdev_backup_clean,
};
-static void blockdev_backup_action(BlkActionState *common, Transaction *tran,
- Error **errp)
+static void blockdev_backup_action(BlockdevBackup *backup,
+ JobTxn *block_job_txn,
+ Transaction *tran, Error **errp)
{
- BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
- BlockdevBackup *backup;
+ BlockdevBackupState *state = g_new0(BlockdevBackupState, 1);
BlockDriverState *bs;
BlockDriverState *target_bs;
AioContext *aio_context;
@@ -1896,9 +1836,6 @@ static void blockdev_backup_action(BlkActionState *common, Transaction *tran,
tran_add(tran, &blockdev_backup_drv, state);
- assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP);
- backup = common->action->u.blockdev_backup.data;
-
bs = bdrv_lookup_bs(backup->device, backup->device, errp);
if (!bs) {
return;
@@ -1929,7 +1866,7 @@ static void blockdev_backup_action(BlkActionState *common, Transaction *tran,
state->job = do_backup_common(qapi_BlockdevBackup_base(backup),
bs, target_bs, aio_context,
- common->block_job_txn, errp);
+ block_job_txn, errp);
aio_context_release(aio_context);
}
@@ -1975,7 +1912,6 @@ static void blockdev_backup_clean(void *opaque)
}
typedef struct BlockDirtyBitmapState {
- BlkActionState common;
BdrvDirtyBitmap *bitmap;
BlockDriverState *bs;
HBitmap *backup;
@@ -1988,17 +1924,14 @@ TransactionActionDrv block_dirty_bitmap_add_drv = {
.clean = g_free,
};
-static void block_dirty_bitmap_add_action(BlkActionState *common,
+static void block_dirty_bitmap_add_action(BlockDirtyBitmapAdd *action,
Transaction *tran, Error **errp)
{
Error *local_err = NULL;
- BlockDirtyBitmapAdd *action;
- BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
- common, common);
+ BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1);
tran_add(tran, &block_dirty_bitmap_add_drv, state);
- action = common->action->u.block_dirty_bitmap_add.data;
/* AIO context taken and released within qmp_block_dirty_bitmap_add */
qmp_block_dirty_bitmap_add(action->node, action->name,
action->has_granularity, action->granularity,
@@ -2031,16 +1964,13 @@ TransactionActionDrv block_dirty_bitmap_clear_drv = {
.clean = g_free,
};
-static void block_dirty_bitmap_clear_action(BlkActionState *common,
+static void block_dirty_bitmap_clear_action(BlockDirtyBitmap *action,
Transaction *tran, Error **errp)
{
- BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
- common, common);
- BlockDirtyBitmap *action;
+ BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1);
tran_add(tran, &block_dirty_bitmap_clear_drv, state);
- action = common->action->u.block_dirty_bitmap_clear.data;
state->bitmap = block_dirty_bitmap_lookup(action->node,
action->name,
&state->bs,
@@ -2078,16 +2008,13 @@ TransactionActionDrv block_dirty_bitmap_enable_drv = {
.clean = g_free,
};
-static void block_dirty_bitmap_enable_action(BlkActionState *common,
+static void block_dirty_bitmap_enable_action(BlockDirtyBitmap *action,
Transaction *tran, Error **errp)
{
- BlockDirtyBitmap *action;
- BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
- common, common);
+ BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1);
tran_add(tran, &block_dirty_bitmap_enable_drv, state);
- action = common->action->u.block_dirty_bitmap_enable.data;
state->bitmap = block_dirty_bitmap_lookup(action->node,
action->name,
NULL,
@@ -2119,16 +2046,13 @@ TransactionActionDrv block_dirty_bitmap_disable_drv = {
.clean = g_free,
};
-static void block_dirty_bitmap_disable_action(BlkActionState *common,
+static void block_dirty_bitmap_disable_action(BlockDirtyBitmap *action,
Transaction *tran, Error **errp)
{
- BlockDirtyBitmap *action;
- BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
- common, common);
+ BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1);
tran_add(tran, &block_dirty_bitmap_disable_drv, state);
- action = common->action->u.block_dirty_bitmap_disable.data;
state->bitmap = block_dirty_bitmap_lookup(action->node,
action->name,
NULL,
@@ -2160,17 +2084,13 @@ TransactionActionDrv block_dirty_bitmap_merge_drv = {
.clean = g_free,
};
-static void block_dirty_bitmap_merge_action(BlkActionState *common,
+static void block_dirty_bitmap_merge_action(BlockDirtyBitmapMerge *action,
Transaction *tran, Error **errp)
{
- BlockDirtyBitmapMerge *action;
- BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
- common, common);
+ BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1);
tran_add(tran, &block_dirty_bitmap_merge_drv, state);
- action = common->action->u.block_dirty_bitmap_merge.data;
-
state->bitmap = block_dirty_bitmap_merge(action->node, action->target,
action->bitmaps, &state->backup,
errp);
@@ -2184,16 +2104,13 @@ TransactionActionDrv block_dirty_bitmap_remove_drv = {
.clean = g_free,
};
-static void block_dirty_bitmap_remove_action(BlkActionState *common,
+static void block_dirty_bitmap_remove_action(BlockDirtyBitmap *action,
Transaction *tran, Error **errp)
{
- BlockDirtyBitmap *action;
- BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
- common, common);
+ BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1);
tran_add(tran, &block_dirty_bitmap_remove_drv, state);
- action = common->action->u.block_dirty_bitmap_remove.data;
state->bitmap = block_dirty_bitmap_remove(action->node, action->name,
false, &state->bs, errp);
@@ -2224,13 +2141,11 @@ static void block_dirty_bitmap_remove_commit(void *opaque)
static void abort_commit(void *opaque);
TransactionActionDrv abort_drv = {
.commit = abort_commit,
- .clean = g_free,
};
-static void abort_action(BlkActionState *common, Transaction *tran,
- Error **errp)
+static void abort_action(Transaction *tran, Error **errp)
{
- tran_add(tran, &abort_drv, common);
+ tran_add(tran, &abort_drv, NULL);
error_setg(errp, "Transaction aborted using Abort action");
}
@@ -2239,62 +2154,66 @@ static void abort_commit(void *opaque)
g_assert_not_reached(); /* this action never succeeds */
}
-static const BlkActionOps actions_map[] = {
- [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT] = {
- .instance_size = sizeof(ExternalSnapshotState),
- .action = external_snapshot_action,
- },
- [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC] = {
- .instance_size = sizeof(ExternalSnapshotState),
- .action = external_snapshot_action,
- },
- [TRANSACTION_ACTION_KIND_DRIVE_BACKUP] = {
- .instance_size = sizeof(DriveBackupState),
- .action = drive_backup_action,
- },
- [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = {
- .instance_size = sizeof(BlockdevBackupState),
- .action = blockdev_backup_action,
- },
- [TRANSACTION_ACTION_KIND_ABORT] = {
- .instance_size = sizeof(BlkActionState),
- .action = abort_action,
- },
- [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC] = {
- .instance_size = sizeof(InternalSnapshotState),
- .action = internal_snapshot_action,
- },
- [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ADD] = {
- .instance_size = sizeof(BlockDirtyBitmapState),
- .action = block_dirty_bitmap_add_action,
- },
- [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_CLEAR] = {
- .instance_size = sizeof(BlockDirtyBitmapState),
- .action = block_dirty_bitmap_clear_action,
- },
- [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ENABLE] = {
- .instance_size = sizeof(BlockDirtyBitmapState),
- .action = block_dirty_bitmap_enable_action,
- },
- [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_DISABLE] = {
- .instance_size = sizeof(BlockDirtyBitmapState),
- .action = block_dirty_bitmap_disable_action,
- },
- [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_MERGE] = {
- .instance_size = sizeof(BlockDirtyBitmapState),
- .action = block_dirty_bitmap_merge_action,
- },
- [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_REMOVE] = {
- .instance_size = sizeof(BlockDirtyBitmapState),
- .action = block_dirty_bitmap_remove_action,
- },
- /* Where are transactions for MIRROR, COMMIT and STREAM?
+static void transaction_action(TransactionAction *act, JobTxn *block_job_txn,
+ Transaction *tran, Error **errp)
+{
+ switch (act->type) {
+ case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT:
+ case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC:
+ external_snapshot_action(act, tran, errp);
+ return;
+ case TRANSACTION_ACTION_KIND_DRIVE_BACKUP:
+ drive_backup_action(act->u.drive_backup.data,
+ block_job_txn, tran, errp);
+ return;
+ case TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP:
+ blockdev_backup_action(act->u.blockdev_backup.data,
+ block_job_txn, tran, errp);
+ return;
+ case TRANSACTION_ACTION_KIND_ABORT:
+ abort_action(tran, errp);
+ return;
+ case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC:
+ internal_snapshot_action(act->u.blockdev_snapshot_internal_sync.data,
+ tran, errp);
+ return;
+ case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ADD:
+ block_dirty_bitmap_add_action(act->u.block_dirty_bitmap_add.data,
+ tran, errp);
+ return;
+ case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_CLEAR:
+ block_dirty_bitmap_clear_action(act->u.block_dirty_bitmap_clear.data,
+ tran, errp);
+ return;
+ case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ENABLE:
+ block_dirty_bitmap_enable_action(act->u.block_dirty_bitmap_enable.data,
+ tran, errp);
+ return;
+ case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_DISABLE:
+ block_dirty_bitmap_disable_action(
+ act->u.block_dirty_bitmap_disable.data, tran, errp);
+ return;
+ case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_MERGE:
+ block_dirty_bitmap_merge_action(act->u.block_dirty_bitmap_merge.data,
+ tran, errp);
+ return;
+ case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_REMOVE:
+ block_dirty_bitmap_remove_action(act->u.block_dirty_bitmap_remove.data,
+ tran, errp);
+ return;
+ /*
+ * Where are transactions for MIRROR, COMMIT and STREAM?
* Although these blockjobs use transaction callbacks like the backup job,
* these jobs do not necessarily adhere to transaction semantics.
* These jobs may not fully undo all of their actions on abort, nor do they
* necessarily work in transactions with more than one job in them.
*/
-};
+ case TRANSACTION_ACTION_KIND__MAX:
+ default:
+ g_assert_not_reached();
+ };
+}
+
/*
* 'Atomic' group operations. The operations are performed as a set, and if
@@ -2345,21 +2264,7 @@ void qmp_transaction(TransactionActionList *actions,
/* We don't do anything in this loop that commits us to the operations */
for (act = actions; act; act = act->next) {
- TransactionAction *dev_info = act->value;
- const BlkActionOps *ops;
- BlkActionState *state;
-
- assert(dev_info->type < ARRAY_SIZE(actions_map));
-
- ops = &actions_map[dev_info->type];
- assert(ops->instance_size > 0);
-
- state = g_malloc0(ops->instance_size);
- state->ops = ops;
- state->action = dev_info;
- state->block_job_txn = block_job_txn;
-
- state->ops->action(state, tran, &local_err);
+ transaction_action(act->value, block_job_txn, tran, &local_err);
if (local_err) {
error_propagate(errp, local_err);
goto delete_and_fail;
--
2.40.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [PULL 07/18] docs/interop/qcow2.txt: fix description about "zlib" clusters
2023-05-17 16:50 [PULL 00/18] Block layer patches Kevin Wolf
` (5 preceding siblings ...)
2023-05-17 16:51 ` [PULL 06/18] blockdev: qmp_transaction: drop extra generic layer Kevin Wolf
@ 2023-05-17 16:51 ` Kevin Wolf
2023-05-17 16:51 ` [PULL 08/18] block: Call .bdrv_co_create(_opts) unlocked Kevin Wolf
` (10 subsequent siblings)
17 siblings, 0 replies; 24+ messages in thread
From: Kevin Wolf @ 2023-05-17 16:51 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, richard.henderson, qemu-devel
From: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
"zlib" clusters are actually raw deflate (RFC1951) clusters without
zlib headers.
Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
Message-Id: <168424874322.11954.1340942046351859521-0@git.sr.ht>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
docs/interop/qcow2.txt | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt
index f7dc304ff6..e7f036c286 100644
--- a/docs/interop/qcow2.txt
+++ b/docs/interop/qcow2.txt
@@ -214,14 +214,18 @@ version 2.
type.
If the incompatible bit "Compression type" is set: the field
- must be present and non-zero (which means non-zlib
+ must be present and non-zero (which means non-deflate
compression type). Otherwise, this field must not be present
- or must be zero (which means zlib).
+ or must be zero (which means deflate).
Available compression type values:
- 0: zlib <https://www.zlib.net/>
+ 0: deflate <https://www.ietf.org/rfc/rfc1951.txt>
1: zstd <http://github.com/facebook/zstd>
+ The deflate compression type is called "zlib"
+ <https://www.zlib.net/> in QEMU. However, clusters with the
+ deflate compression type do not have zlib headers.
+
=== Header padding ===
--
2.40.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [PULL 08/18] block: Call .bdrv_co_create(_opts) unlocked
2023-05-17 16:50 [PULL 00/18] Block layer patches Kevin Wolf
` (6 preceding siblings ...)
2023-05-17 16:51 ` [PULL 07/18] docs/interop/qcow2.txt: fix description about "zlib" clusters Kevin Wolf
@ 2023-05-17 16:51 ` Kevin Wolf
2023-05-17 16:51 ` [PULL 09/18] block/export: Fix null pointer dereference in error path Kevin Wolf
` (9 subsequent siblings)
17 siblings, 0 replies; 24+ messages in thread
From: Kevin Wolf @ 2023-05-17 16:51 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, richard.henderson, qemu-devel
These are functions that modify the graph, so they must be able to take
a writer lock. This is impossible if they already hold the reader lock.
If they need a reader lock for some of their operations, they should
take it internally.
Many of them go through blk_*(), which will always take the lock itself.
Direct calls of bdrv_*() need to take the reader lock. Note that while
locking for bdrv_co_*() calls is checked by TSA, this is not the case
for the mixed_coroutine_fns bdrv_*(). Holding the lock is still required
when they are called from coroutine context like here!
This effectively reverts 4ec8df0183, but adds some internal locking
instead.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20230510203601.418015-2-kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
include/block/block-global-state.h | 8 +++----
include/block/block_int-common.h | 4 ++--
block.c | 1 -
block/create.c | 1 -
block/crypto.c | 25 ++++++++++----------
block/parallels.c | 6 ++---
block/qcow.c | 6 ++---
block/qcow2.c | 37 +++++++++++++++++++-----------
block/qed.c | 6 ++---
block/raw-format.c | 2 +-
block/vdi.c | 11 +++++----
block/vhdx.c | 8 ++++---
block/vmdk.c | 27 ++++++++++------------
block/vpc.c | 6 ++---
14 files changed, 78 insertions(+), 70 deletions(-)
diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
index 2d93423d35..f347199bff 100644
--- a/include/block/block-global-state.h
+++ b/include/block/block-global-state.h
@@ -58,14 +58,14 @@ BlockDriver *bdrv_find_protocol(const char *filename,
Error **errp);
BlockDriver *bdrv_find_format(const char *format_name);
-int coroutine_fn GRAPH_RDLOCK
+int coroutine_fn GRAPH_UNLOCKED
bdrv_co_create(BlockDriver *drv, const char *filename, QemuOpts *opts,
Error **errp);
-int co_wrapper_bdrv_rdlock bdrv_create(BlockDriver *drv, const char *filename,
- QemuOpts *opts, Error **errp);
+int co_wrapper bdrv_create(BlockDriver *drv, const char *filename,
+ QemuOpts *opts, Error **errp);
-int coroutine_fn GRAPH_RDLOCK
+int coroutine_fn GRAPH_UNLOCKED
bdrv_co_create_file(const char *filename, QemuOpts *opts, Error **errp);
BlockDriverState *bdrv_new(void);
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index dbec0e3bb4..6492a1e538 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -250,10 +250,10 @@ struct BlockDriver {
BlockDriverState *bs, QDict *options, int flags, Error **errp);
void (*bdrv_close)(BlockDriverState *bs);
- int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_create)(
+ int coroutine_fn GRAPH_UNLOCKED_PTR (*bdrv_co_create)(
BlockdevCreateOptions *opts, Error **errp);
- int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_create_opts)(
+ int coroutine_fn GRAPH_UNLOCKED_PTR (*bdrv_co_create_opts)(
BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp);
int (*bdrv_amend_options)(BlockDriverState *bs,
diff --git a/block.c b/block.c
index f04a6ad4e8..a2f8d5a0c0 100644
--- a/block.c
+++ b/block.c
@@ -533,7 +533,6 @@ int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename,
int ret;
GLOBAL_STATE_CODE();
ERRP_GUARD();
- assert_bdrv_graph_readable();
if (!drv->bdrv_co_create_opts) {
error_setg(errp, "Driver '%s' does not support image creation",
diff --git a/block/create.c b/block/create.c
index bf67b9947c..6b23a21675 100644
--- a/block/create.c
+++ b/block/create.c
@@ -43,7 +43,6 @@ static int coroutine_fn blockdev_create_run(Job *job, Error **errp)
int ret;
GLOBAL_STATE_CODE();
- GRAPH_RDLOCK_GUARD();
job_progress_set_remaining(&s->common, 1);
ret = s->drv->bdrv_co_create(s->opts, errp);
diff --git a/block/crypto.c b/block/crypto.c
index 30093cff9b..6ee8d46d30 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -99,12 +99,10 @@ struct BlockCryptoCreateData {
};
-static int block_crypto_create_write_func(QCryptoBlock *block,
- size_t offset,
- const uint8_t *buf,
- size_t buflen,
- void *opaque,
- Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+block_crypto_create_write_func(QCryptoBlock *block, size_t offset,
+ const uint8_t *buf, size_t buflen, void *opaque,
+ Error **errp)
{
struct BlockCryptoCreateData *data = opaque;
ssize_t ret;
@@ -117,10 +115,9 @@ static int block_crypto_create_write_func(QCryptoBlock *block,
return 0;
}
-static int block_crypto_create_init_func(QCryptoBlock *block,
- size_t headerlen,
- void *opaque,
- Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+block_crypto_create_init_func(QCryptoBlock *block, size_t headerlen,
+ void *opaque, Error **errp)
{
struct BlockCryptoCreateData *data = opaque;
Error *local_error = NULL;
@@ -314,7 +311,7 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
}
-static int coroutine_fn
+static int coroutine_fn GRAPH_UNLOCKED
block_crypto_co_create_generic(BlockDriverState *bs, int64_t size,
QCryptoBlockCreateOptions *opts,
PreallocMode prealloc, Error **errp)
@@ -627,7 +624,7 @@ static int block_crypto_open_luks(BlockDriverState *bs,
bs, options, flags, errp);
}
-static int coroutine_fn
+static int coroutine_fn GRAPH_UNLOCKED
block_crypto_co_create_luks(BlockdevCreateOptions *create_options, Error **errp)
{
BlockdevCreateOptionsLUKS *luks_opts;
@@ -665,7 +662,7 @@ fail:
return ret;
}
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
block_crypto_co_create_opts_luks(BlockDriver *drv, const char *filename,
QemuOpts *opts, Error **errp)
{
@@ -727,7 +724,9 @@ fail:
* beforehand, it has been truncated and corrupted in the process.
*/
if (ret) {
+ bdrv_graph_co_rdlock();
bdrv_co_delete_file_noerr(bs);
+ bdrv_graph_co_rdunlock();
}
bdrv_co_unref(bs);
diff --git a/block/parallels.c b/block/parallels.c
index b49c35929e..d8a3f13e24 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -522,8 +522,8 @@ out:
}
-static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts,
- Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+parallels_co_create(BlockdevCreateOptions* opts, Error **errp)
{
BlockdevCreateOptionsParallels *parallels_opts;
BlockDriverState *bs;
@@ -622,7 +622,7 @@ exit:
goto out;
}
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
parallels_co_create_opts(BlockDriver *drv, const char *filename,
QemuOpts *opts, Error **errp)
{
diff --git a/block/qcow.c b/block/qcow.c
index a0c701f578..3644bbf5cb 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -800,8 +800,8 @@ static void qcow_close(BlockDriverState *bs)
error_free(s->migration_blocker);
}
-static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts,
- Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+qcow_co_create(BlockdevCreateOptions *opts, Error **errp)
{
BlockdevCreateOptionsQcow *qcow_opts;
int header_size, backing_filename_len, l1_size, shift, i;
@@ -921,7 +921,7 @@ exit:
return ret;
}
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
qcow_co_create_opts(BlockDriver *drv, const char *filename,
QemuOpts *opts, Error **errp)
{
diff --git a/block/qcow2.c b/block/qcow2.c
index 5bde3b8401..73f36447f9 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -118,8 +118,9 @@ static int qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset,
}
-static int qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen,
- void *opaque, Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen, void *opaque,
+ Error **errp)
{
BlockDriverState *bs = opaque;
BDRVQcow2State *s = bs->opaque;
@@ -144,9 +145,7 @@ static int qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen,
*/
clusterlen = size_to_clusters(s, headerlen) * s->cluster_size;
assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen, false) == 0);
- ret = bdrv_pwrite_zeroes(bs->file,
- ret,
- clusterlen, 0);
+ ret = bdrv_co_pwrite_zeroes(bs->file, ret, clusterlen, 0);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not zero fill encryption header");
return -1;
@@ -156,9 +155,11 @@ static int qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen,
}
-static int qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset,
- const uint8_t *buf, size_t buflen,
- void *opaque, Error **errp)
+/* The graph lock must be held when called in coroutine context */
+static int coroutine_mixed_fn
+qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset,
+ const uint8_t *buf, size_t buflen,
+ void *opaque, Error **errp)
{
BlockDriverState *bs = opaque;
BDRVQcow2State *s = bs->opaque;
@@ -3137,9 +3138,10 @@ static int qcow2_change_backing_file(BlockDriverState *bs,
return qcow2_update_header(bs);
}
-static int qcow2_set_up_encryption(BlockDriverState *bs,
- QCryptoBlockCreateOptions *cryptoopts,
- Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_set_up_encryption(BlockDriverState *bs,
+ QCryptoBlockCreateOptions *cryptoopts,
+ Error **errp)
{
BDRVQcow2State *s = bs->opaque;
QCryptoBlock *crypto = NULL;
@@ -3426,7 +3428,7 @@ static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version,
return refcount_bits;
}
-static int coroutine_fn
+static int coroutine_fn GRAPH_UNLOCKED
qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
{
BlockdevCreateOptionsQcow2 *qcow2_opts;
@@ -3724,8 +3726,10 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
goto out;
}
+ bdrv_graph_co_rdlock();
ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size);
if (ret < 0) {
+ bdrv_graph_co_rdunlock();
error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
"header and refcount table");
goto out;
@@ -3743,6 +3747,8 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
/* Create a full header (including things like feature table) */
ret = qcow2_update_header(blk_bs(blk));
+ bdrv_graph_co_rdunlock();
+
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not update qcow2 header");
goto out;
@@ -3776,7 +3782,10 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
/* Want encryption? There you go. */
if (qcow2_opts->encrypt) {
+ bdrv_graph_co_rdlock();
ret = qcow2_set_up_encryption(blk_bs(blk), qcow2_opts->encrypt, errp);
+ bdrv_graph_co_rdunlock();
+
if (ret < 0) {
goto out;
}
@@ -3813,7 +3822,7 @@ out:
return ret;
}
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
qcow2_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts,
Error **errp)
{
@@ -3933,8 +3942,10 @@ qcow2_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts,
ret = qcow2_co_create(create_options, errp);
finish:
if (ret < 0) {
+ bdrv_graph_co_rdlock();
bdrv_co_delete_file_noerr(bs);
bdrv_co_delete_file_noerr(data_bs);
+ bdrv_graph_co_rdunlock();
} else {
ret = 0;
}
diff --git a/block/qed.c b/block/qed.c
index be9ff0fb34..9a0350b534 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -630,8 +630,8 @@ static void bdrv_qed_close(BlockDriverState *bs)
qemu_vfree(s->l1_table);
}
-static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts,
- Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+bdrv_qed_co_create(BlockdevCreateOptions *opts, Error **errp)
{
BlockdevCreateOptionsQed *qed_opts;
BlockBackend *blk = NULL;
@@ -751,7 +751,7 @@ out:
return ret;
}
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
bdrv_qed_co_create_opts(BlockDriver *drv, const char *filename,
QemuOpts *opts, Error **errp)
{
diff --git a/block/raw-format.c b/block/raw-format.c
index 3a3946213f..918fe4fb7e 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -457,7 +457,7 @@ static int raw_has_zero_init(BlockDriverState *bs)
return bdrv_has_zero_init(bs->file->bs);
}
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
raw_co_create_opts(BlockDriver *drv, const char *filename,
QemuOpts *opts, Error **errp)
{
diff --git a/block/vdi.c b/block/vdi.c
index 08331d2dd7..6c35309e04 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -734,8 +734,9 @@ nonallocating_write:
return ret;
}
-static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options,
- size_t block_size, Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+vdi_co_do_create(BlockdevCreateOptions *create_options, size_t block_size,
+ Error **errp)
{
BlockdevCreateOptionsVdi *vdi_opts;
int ret = 0;
@@ -892,13 +893,13 @@ exit:
return ret;
}
-static int coroutine_fn vdi_co_create(BlockdevCreateOptions *create_options,
- Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+vdi_co_create(BlockdevCreateOptions *create_options, Error **errp)
{
return vdi_co_do_create(create_options, DEFAULT_CLUSTER_SIZE, errp);
}
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
vdi_co_create_opts(BlockDriver *drv, const char *filename,
QemuOpts *opts, Error **errp)
{
diff --git a/block/vhdx.c b/block/vhdx.c
index b20b1edf11..89913cba87 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -1506,7 +1506,7 @@ exit:
* There are 2 headers, and the highest sequence number will represent
* the active header
*/
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
vhdx_create_new_headers(BlockBackend *blk, uint64_t image_size,
uint32_t log_size)
{
@@ -1515,6 +1515,8 @@ vhdx_create_new_headers(BlockBackend *blk, uint64_t image_size,
int ret = 0;
VHDXHeader *hdr = NULL;
+ GRAPH_RDLOCK_GUARD();
+
hdr = g_new0(VHDXHeader, 1);
hdr->signature = VHDX_HEADER_SIGNATURE;
@@ -1898,7 +1900,7 @@ exit:
* .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------.
* 1MB
*/
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
vhdx_co_create(BlockdevCreateOptions *opts, Error **errp)
{
BlockdevCreateOptionsVhdx *vhdx_opts;
@@ -2060,7 +2062,7 @@ delete_and_exit:
return ret;
}
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
vhdx_co_create_opts(BlockDriver *drv, const char *filename,
QemuOpts *opts, Error **errp)
{
diff --git a/block/vmdk.c b/block/vmdk.c
index fddbd1c86c..e3e86608ec 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -2165,10 +2165,9 @@ vmdk_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
return ret;
}
-static int vmdk_init_extent(BlockBackend *blk,
- int64_t filesize, bool flat,
- bool compress, bool zeroed_grain,
- Error **errp)
+static int GRAPH_UNLOCKED
+vmdk_init_extent(BlockBackend *blk, int64_t filesize, bool flat, bool compress,
+ bool zeroed_grain, Error **errp)
{
int ret, i;
VMDK4Header header;
@@ -2277,7 +2276,7 @@ exit:
return ret;
}
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
vmdk_create_extent(const char *filename, int64_t filesize, bool flat,
bool compress, bool zeroed_grain, BlockBackend **pbb,
QemuOpts *opts, Error **errp)
@@ -2358,7 +2357,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
* non-split format.
* idx >= 1: get the n-th extent if in a split subformat
*/
-typedef BlockBackend * coroutine_fn /* GRAPH_RDLOCK */
+typedef BlockBackend * coroutine_fn GRAPH_UNLOCKED_PTR
(*vmdk_create_extent_fn)(int64_t size, int idx, bool flat, bool split,
bool compress, bool zeroed_grain, void *opaque,
Error **errp);
@@ -2374,7 +2373,7 @@ static void vmdk_desc_add_extent(GString *desc,
g_free(basename);
}
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
vmdk_co_do_create(int64_t size,
BlockdevVmdkSubformat subformat,
BlockdevVmdkAdapterType adapter_type,
@@ -2605,7 +2604,7 @@ typedef struct {
QemuOpts *opts;
} VMDKCreateOptsData;
-static BlockBackend * coroutine_fn GRAPH_RDLOCK
+static BlockBackend * coroutine_fn GRAPH_UNLOCKED
vmdk_co_create_opts_cb(int64_t size, int idx, bool flat, bool split,
bool compress, bool zeroed_grain, void *opaque,
Error **errp)
@@ -2647,7 +2646,7 @@ exit:
return blk;
}
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
vmdk_co_create_opts(BlockDriver *drv, const char *filename,
QemuOpts *opts, Error **errp)
{
@@ -2756,11 +2755,9 @@ exit:
return ret;
}
-static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx,
- bool flat, bool split,
- bool compress,
- bool zeroed_grain,
- void *opaque, Error **errp)
+static BlockBackend * coroutine_fn GRAPH_UNLOCKED
+vmdk_co_create_cb(int64_t size, int idx, bool flat, bool split, bool compress,
+ bool zeroed_grain, void *opaque, Error **errp)
{
int ret;
BlockDriverState *bs;
@@ -2809,7 +2806,7 @@ static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx,
return blk;
}
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
vmdk_co_create(BlockdevCreateOptions *create_options, Error **errp)
{
BlockdevCreateOptionsVmdk *opts;
diff --git a/block/vpc.c b/block/vpc.c
index 07ddda5b99..7ee7c7b4e0 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -967,8 +967,8 @@ static int calculate_rounded_image_size(BlockdevCreateOptionsVpc *vpc_opts,
return 0;
}
-static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts,
- Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+vpc_co_create(BlockdevCreateOptions *opts, Error **errp)
{
BlockdevCreateOptionsVpc *vpc_opts;
BlockBackend *blk = NULL;
@@ -1087,7 +1087,7 @@ out:
return ret;
}
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
vpc_co_create_opts(BlockDriver *drv, const char *filename,
QemuOpts *opts, Error **errp)
{
--
2.40.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [PULL 09/18] block/export: Fix null pointer dereference in error path
2023-05-17 16:50 [PULL 00/18] Block layer patches Kevin Wolf
` (7 preceding siblings ...)
2023-05-17 16:51 ` [PULL 08/18] block: Call .bdrv_co_create(_opts) unlocked Kevin Wolf
@ 2023-05-17 16:51 ` Kevin Wolf
2023-05-17 16:51 ` [PULL 10/18] qcow2: Unlock the graph in qcow2_do_open() where necessary Kevin Wolf
` (8 subsequent siblings)
17 siblings, 0 replies; 24+ messages in thread
From: Kevin Wolf @ 2023-05-17 16:51 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, richard.henderson, qemu-devel
There are some error paths in blk_exp_add() that jump to 'fail:' before
'exp' is even created. So we can't just unconditionally access exp->blk.
Add a NULL check, and switch from exp->blk to blk, which is available
earlier, just to be extra sure that we really cover all cases where
BlockDevOps could have been set for it (in practice, this only happens
in drv->create() today, so this part of the change isn't strictly
necessary).
Fixes: Coverity CID 1509238
Fixes: de79b52604e43fdeba6cee4f5af600b62169f2d2
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20230510203601.418015-3-kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Tested-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block/export/export.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/block/export/export.c b/block/export/export.c
index 62c7c22d45..a5c8f42f53 100644
--- a/block/export/export.c
+++ b/block/export/export.c
@@ -192,8 +192,10 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
return exp;
fail:
- blk_set_dev_ops(exp->blk, NULL, NULL);
- blk_unref(blk);
+ if (blk) {
+ blk_set_dev_ops(blk, NULL, NULL);
+ blk_unref(blk);
+ }
aio_context_release(ctx);
if (exp) {
g_free(exp->id);
--
2.40.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [PULL 10/18] qcow2: Unlock the graph in qcow2_do_open() where necessary
2023-05-17 16:50 [PULL 00/18] Block layer patches Kevin Wolf
` (8 preceding siblings ...)
2023-05-17 16:51 ` [PULL 09/18] block/export: Fix null pointer dereference in error path Kevin Wolf
@ 2023-05-17 16:51 ` Kevin Wolf
2023-05-17 16:51 ` [PULL 11/18] qemu-img: Take graph lock more selectively Kevin Wolf
` (7 subsequent siblings)
17 siblings, 0 replies; 24+ messages in thread
From: Kevin Wolf @ 2023-05-17 16:51 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, richard.henderson, qemu-devel
qcow2_do_open() calls a few no_co_wrappers that wrap functions taking
the graph lock internally as a writer. Therefore, it can't hold the
reader lock across these calls, it causes deadlocks. Drop the lock
temporarily around the calls.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20230510203601.418015-4-kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block/qcow2.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/block/qcow2.c b/block/qcow2.c
index 73f36447f9..b00b4e7575 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1619,9 +1619,11 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
if (open_data_file) {
/* Open external data file */
+ bdrv_graph_co_rdunlock();
s->data_file = bdrv_co_open_child(NULL, options, "data-file", bs,
&child_of_bds, BDRV_CHILD_DATA,
true, errp);
+ bdrv_graph_co_rdlock();
if (*errp) {
ret = -EINVAL;
goto fail;
@@ -1629,10 +1631,12 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) {
if (!s->data_file && s->image_data_file) {
+ bdrv_graph_co_rdunlock();
s->data_file = bdrv_co_open_child(s->image_data_file, options,
"data-file", bs,
&child_of_bds,
BDRV_CHILD_DATA, false, errp);
+ bdrv_graph_co_rdlock();
if (!s->data_file) {
ret = -EINVAL;
goto fail;
@@ -1857,7 +1861,9 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
fail:
g_free(s->image_data_file);
if (open_data_file && has_data_file(bs)) {
+ bdrv_graph_co_rdunlock();
bdrv_unref_child(bs, s->data_file);
+ bdrv_graph_co_rdlock();
s->data_file = NULL;
}
g_free(s->unknown_header_fields);
--
2.40.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [PULL 11/18] qemu-img: Take graph lock more selectively
2023-05-17 16:50 [PULL 00/18] Block layer patches Kevin Wolf
` (9 preceding siblings ...)
2023-05-17 16:51 ` [PULL 10/18] qcow2: Unlock the graph in qcow2_do_open() where necessary Kevin Wolf
@ 2023-05-17 16:51 ` Kevin Wolf
2023-05-17 16:51 ` [PULL 12/18] test-bdrv-drain: " Kevin Wolf
` (6 subsequent siblings)
17 siblings, 0 replies; 24+ messages in thread
From: Kevin Wolf @ 2023-05-17 16:51 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, richard.henderson, qemu-devel
If we take a reader lock, we can't call any functions that take a writer
lock internally without causing deadlocks once the reader lock is
actually enforced in the main thread, too. Take the reader lock only
where it is actually needed.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20230510203601.418015-5-kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
qemu-img.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/qemu-img.c b/qemu-img.c
index 9f9f0a7629..27f48051b0 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -2938,8 +2938,6 @@ static BlockGraphInfoList *collect_image_info_list(bool image_opts,
}
bs = blk_bs(blk);
- GRAPH_RDLOCK_GUARD_MAINLOOP();
-
/*
* Note that the returned BlockGraphInfo object will not have
* information about this image's backing node, because we have opened
@@ -2947,7 +2945,10 @@ static BlockGraphInfoList *collect_image_info_list(bool image_opts,
* duplicate the backing chain information that we obtain by walking
* the chain manually here.
*/
+ bdrv_graph_rdlock_main_loop();
bdrv_query_block_graph_info(bs, &info, &err);
+ bdrv_graph_rdunlock_main_loop();
+
if (err) {
error_report_err(err);
blk_unref(blk);
--
2.40.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [PULL 12/18] test-bdrv-drain: Take graph lock more selectively
2023-05-17 16:50 [PULL 00/18] Block layer patches Kevin Wolf
` (10 preceding siblings ...)
2023-05-17 16:51 ` [PULL 11/18] qemu-img: Take graph lock more selectively Kevin Wolf
@ 2023-05-17 16:51 ` Kevin Wolf
2023-05-17 16:51 ` [PULL 13/18] test-bdrv-drain: Call bdrv_co_unref() in coroutine context Kevin Wolf
` (5 subsequent siblings)
17 siblings, 0 replies; 24+ messages in thread
From: Kevin Wolf @ 2023-05-17 16:51 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, richard.henderson, qemu-devel
If we take a reader lock, we can't call any functions that take a writer
lock internally without causing deadlocks once the reader lock is
actually enforced in the main thread, too. Take the reader lock only
where it is actually needed.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20230510203601.418015-6-kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
tests/unit/test-bdrv-drain.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index 9a4c5e59d6..ae4299ccfa 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -1004,8 +1004,6 @@ static void coroutine_fn test_co_delete_by_drain(void *opaque)
void *buffer = g_malloc(65536);
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buffer, 65536);
- GRAPH_RDLOCK_GUARD();
-
/* Pretend some internal write operation from parent to child.
* Important: We have to read from the child, not from the parent!
* Draining works by first propagating it all up the tree to the
@@ -1014,7 +1012,9 @@ static void coroutine_fn test_co_delete_by_drain(void *opaque)
* everything will be drained before we go back down the tree, but
* we do not want that. We want to be in the middle of draining
* when this following requests returns. */
+ bdrv_graph_co_rdlock();
bdrv_co_preadv(tts->wait_child, 0, 65536, &qiov, 0);
+ bdrv_graph_co_rdunlock();
g_assert_cmpint(bs->refcnt, ==, 1);
--
2.40.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [PULL 13/18] test-bdrv-drain: Call bdrv_co_unref() in coroutine context
2023-05-17 16:50 [PULL 00/18] Block layer patches Kevin Wolf
` (11 preceding siblings ...)
2023-05-17 16:51 ` [PULL 12/18] test-bdrv-drain: " Kevin Wolf
@ 2023-05-17 16:51 ` Kevin Wolf
2023-05-17 16:51 ` [PULL 14/18] blockjob: Adhere to rate limit even when reentered early Kevin Wolf
` (4 subsequent siblings)
17 siblings, 0 replies; 24+ messages in thread
From: Kevin Wolf @ 2023-05-17 16:51 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, richard.henderson, qemu-devel
bdrv_unref() is a no_coroutine_fn, so calling it from coroutine context
is invalid. Use bdrv_co_unref() instead.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20230510203601.418015-7-kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
tests/unit/test-bdrv-drain.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index ae4299ccfa..08bb0f9984 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -1019,7 +1019,7 @@ static void coroutine_fn test_co_delete_by_drain(void *opaque)
g_assert_cmpint(bs->refcnt, ==, 1);
if (!dbdd->detach_instead_of_delete) {
- blk_unref(blk);
+ blk_co_unref(blk);
} else {
BdrvChild *c, *next_c;
QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) {
--
2.40.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [PULL 14/18] blockjob: Adhere to rate limit even when reentered early
2023-05-17 16:50 [PULL 00/18] Block layer patches Kevin Wolf
` (12 preceding siblings ...)
2023-05-17 16:51 ` [PULL 13/18] test-bdrv-drain: Call bdrv_co_unref() in coroutine context Kevin Wolf
@ 2023-05-17 16:51 ` Kevin Wolf
2023-05-17 16:51 ` [PULL 15/18] graph-lock: Honour read locks even in the main thread Kevin Wolf
` (3 subsequent siblings)
17 siblings, 0 replies; 24+ messages in thread
From: Kevin Wolf @ 2023-05-17 16:51 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, richard.henderson, qemu-devel
When jobs are sleeping, for example to enforce a given rate limit, they
can be reentered early, in particular in order to get paused, to update
the rate limit or to get cancelled.
Before this patch, they behave in this case as if they had fully
completed their rate limiting delay. This means that requests are sped
up beyond their limit, violating the constraints that the user gave us.
Change the block jobs to sleep in a loop until the necessary delay is
completed, while still allowing cancelling them immediately as well
pausing (handled by the pause point in job_sleep_ns()) and updating the
rate limit.
This change is also motivated by iotests cases being prone to fail
because drain operations pause and unpause them so often that block jobs
complete earlier than they are supposed to. In particular, the next
commit would fail iotests 030 without this change.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20230510203601.418015-8-kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
include/block/blockjob_int.h | 14 ++++++++++----
block/commit.c | 7 ++-----
block/mirror.c | 23 ++++++++++-------------
block/stream.c | 7 ++-----
blockjob.c | 22 ++++++++++++++++++++--
5 files changed, 44 insertions(+), 29 deletions(-)
diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
index f008446285..104824040c 100644
--- a/include/block/blockjob_int.h
+++ b/include/block/blockjob_int.h
@@ -126,12 +126,18 @@ void block_job_user_resume(Job *job);
*/
/**
- * block_job_ratelimit_get_delay:
+ * block_job_ratelimit_processed_bytes:
*
- * Calculate and return delay for the next request in ns. See the documentation
- * of ratelimit_calculate_delay() for details.
+ * To be called after some work has been done. Adjusts the delay for the next
+ * request. See the documentation of ratelimit_calculate_delay() for details.
*/
-int64_t block_job_ratelimit_get_delay(BlockJob *job, uint64_t n);
+void block_job_ratelimit_processed_bytes(BlockJob *job, uint64_t n);
+
+/**
+ * Put the job to sleep (assuming that it wasn't canceled) to throttle it to the
+ * right speed according to its rate limiting.
+ */
+void block_job_ratelimit_sleep(BlockJob *job);
/**
* block_job_error_action:
diff --git a/block/commit.c b/block/commit.c
index 2b20fd0fd4..aa45beb0f0 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -116,7 +116,6 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
{
CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
int64_t offset;
- uint64_t delay_ns = 0;
int ret = 0;
int64_t n = 0; /* bytes */
QEMU_AUTO_VFREE void *buf = NULL;
@@ -149,7 +148,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
/* Note that even when no rate limit is applied we need to yield
* with no pending I/O here so that bdrv_drain_all() returns.
*/
- job_sleep_ns(&s->common.job, delay_ns);
+ block_job_ratelimit_sleep(&s->common);
if (job_is_cancelled(&s->common.job)) {
break;
}
@@ -184,9 +183,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
job_progress_update(&s->common.job, n);
if (copy) {
- delay_ns = block_job_ratelimit_get_delay(&s->common, n);
- } else {
- delay_ns = 0;
+ block_job_ratelimit_processed_bytes(&s->common, n);
}
}
diff --git a/block/mirror.c b/block/mirror.c
index 717442ca4d..b7d92d1378 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -471,12 +471,11 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset,
return bytes_handled;
}
-static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
+static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
{
BlockDriverState *source = s->mirror_top_bs->backing->bs;
MirrorOp *pseudo_op;
int64_t offset;
- uint64_t delay_ns = 0, ret = 0;
/* At least the first dirty chunk is mirrored in one iteration. */
int nb_chunks = 1;
bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target));
@@ -608,16 +607,13 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
assert(io_bytes);
offset += io_bytes;
nb_chunks -= DIV_ROUND_UP(io_bytes, s->granularity);
- delay_ns = block_job_ratelimit_get_delay(&s->common, io_bytes_acct);
+ block_job_ratelimit_processed_bytes(&s->common, io_bytes_acct);
}
- ret = delay_ns;
fail:
QTAILQ_REMOVE(&s->ops_in_flight, pseudo_op, next);
qemu_co_queue_restart_all(&pseudo_op->waiting_requests);
g_free(pseudo_op);
-
- return ret;
}
static void mirror_free_init(MirrorBlockJob *s)
@@ -1011,7 +1007,6 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
assert(!s->dbi);
s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap);
for (;;) {
- uint64_t delay_ns = 0;
int64_t cnt, delta;
bool should_complete;
@@ -1051,7 +1046,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
mirror_wait_for_free_in_flight_slot(s);
continue;
} else if (cnt != 0) {
- delay_ns = mirror_iteration(s);
+ mirror_iteration(s);
}
}
@@ -1114,12 +1109,14 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
}
if (job_is_ready(&s->common.job) && !should_complete) {
- delay_ns = (s->in_flight == 0 &&
- cnt == 0 ? BLOCK_JOB_SLICE_TIME : 0);
+ if (s->in_flight == 0 && cnt == 0) {
+ trace_mirror_before_sleep(s, cnt, job_is_ready(&s->common.job),
+ BLOCK_JOB_SLICE_TIME);
+ job_sleep_ns(&s->common.job, BLOCK_JOB_SLICE_TIME);
+ }
+ } else {
+ block_job_ratelimit_sleep(&s->common);
}
- trace_mirror_before_sleep(s, cnt, job_is_ready(&s->common.job),
- delay_ns);
- job_sleep_ns(&s->common.job, delay_ns);
s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
}
diff --git a/block/stream.c b/block/stream.c
index 7f9e1ecdbb..e522bbdec5 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -131,7 +131,6 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
int64_t len;
int64_t offset = 0;
- uint64_t delay_ns = 0;
int error = 0;
int64_t n = 0; /* bytes */
@@ -155,7 +154,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
/* Note that even when no rate limit is applied we need to yield
* with no pending I/O here so that bdrv_drain_all() returns.
*/
- job_sleep_ns(&s->common.job, delay_ns);
+ block_job_ratelimit_sleep(&s->common);
if (job_is_cancelled(&s->common.job)) {
break;
}
@@ -204,9 +203,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
/* Publish progress */
job_progress_update(&s->common.job, n);
if (copy) {
- delay_ns = block_job_ratelimit_get_delay(&s->common, n);
- } else {
- delay_ns = 0;
+ block_job_ratelimit_processed_bytes(&s->common, n);
}
}
diff --git a/blockjob.c b/blockjob.c
index 659c3cb9de..913da3cbf7 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -319,10 +319,28 @@ static bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
return block_job_set_speed_locked(job, speed, errp);
}
-int64_t block_job_ratelimit_get_delay(BlockJob *job, uint64_t n)
+void block_job_ratelimit_processed_bytes(BlockJob *job, uint64_t n)
{
IO_CODE();
- return ratelimit_calculate_delay(&job->limit, n);
+ ratelimit_calculate_delay(&job->limit, n);
+}
+
+void block_job_ratelimit_sleep(BlockJob *job)
+{
+ uint64_t delay_ns;
+
+ /*
+ * Sleep at least once. If the job is reentered early, keep waiting until
+ * we've waited for the full time that is necessary to keep the job at the
+ * right speed.
+ *
+ * Make sure to recalculate the delay after each (possibly interrupted)
+ * sleep because the speed can change while the job has yielded.
+ */
+ do {
+ delay_ns = ratelimit_calculate_delay(&job->limit, 0);
+ job_sleep_ns(&job->job, delay_ns);
+ } while (delay_ns && !job_is_cancelled(&job->job));
}
BlockJobInfo *block_job_query_locked(BlockJob *job, Error **errp)
--
2.40.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [PULL 15/18] graph-lock: Honour read locks even in the main thread
2023-05-17 16:50 [PULL 00/18] Block layer patches Kevin Wolf
` (13 preceding siblings ...)
2023-05-17 16:51 ` [PULL 14/18] blockjob: Adhere to rate limit even when reentered early Kevin Wolf
@ 2023-05-17 16:51 ` Kevin Wolf
2023-05-17 16:51 ` [PULL 16/18] iotests/245: Check if 'compress' driver is available Kevin Wolf
` (2 subsequent siblings)
17 siblings, 0 replies; 24+ messages in thread
From: Kevin Wolf @ 2023-05-17 16:51 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, richard.henderson, qemu-devel
There are some conditions under which we don't actually need to do
anything for taking a reader lock: Writing the graph is only possible
from the main context while holding the BQL. So if a reader is running
in the main context under the BQL and knows that it won't be interrupted
until the next writer runs, we don't actually need to do anything.
This is the case if the reader code neither has a nested event loop
(this is forbidden anyway while you hold the lock) nor is a coroutine
(because a writer could run when the coroutine has yielded).
These conditions are exactly what bdrv_graph_rdlock_main_loop() asserts.
They are not fulfilled in bdrv_graph_co_rdlock(), which always runs in a
coroutine.
This deletes the shortcuts in bdrv_graph_co_rdlock() that skip taking
the reader lock in the main thread.
Reported-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20230510203601.418015-9-kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block/graph-lock.c | 10 ----------
1 file changed, 10 deletions(-)
diff --git a/block/graph-lock.c b/block/graph-lock.c
index 377884c3a9..9c42bd9799 100644
--- a/block/graph-lock.c
+++ b/block/graph-lock.c
@@ -162,11 +162,6 @@ void coroutine_fn bdrv_graph_co_rdlock(void)
BdrvGraphRWlock *bdrv_graph;
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
- /* Do not lock if in main thread */
- if (qemu_in_main_thread()) {
- return;
- }
-
for (;;) {
qatomic_set(&bdrv_graph->reader_count,
bdrv_graph->reader_count + 1);
@@ -230,11 +225,6 @@ void coroutine_fn bdrv_graph_co_rdunlock(void)
BdrvGraphRWlock *bdrv_graph;
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
- /* Do not lock if in main thread */
- if (qemu_in_main_thread()) {
- return;
- }
-
qatomic_store_release(&bdrv_graph->reader_count,
bdrv_graph->reader_count - 1);
/* make sure writer sees reader_count before we check has_writer */
--
2.40.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [PULL 16/18] iotests/245: Check if 'compress' driver is available
2023-05-17 16:50 [PULL 00/18] Block layer patches Kevin Wolf
` (14 preceding siblings ...)
2023-05-17 16:51 ` [PULL 15/18] graph-lock: Honour read locks even in the main thread Kevin Wolf
@ 2023-05-17 16:51 ` Kevin Wolf
2023-05-17 16:51 ` [PULL 17/18] aio-posix: do not nest poll handlers Kevin Wolf
2023-05-17 16:51 ` [PULL 18/18] tested: add test for nested aio_poll() in " Kevin Wolf
17 siblings, 0 replies; 24+ messages in thread
From: Kevin Wolf @ 2023-05-17 16:51 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, richard.henderson, qemu-devel
Skip TestBlockdevReopen.test_insert_compress_filter() if the 'compress'
driver isn't available.
In order to make the test succeed when the case is skipped, we also need
to remove any output from it (which would be missing in the case where
we skip it). This is done by replacing qemu_io_log() with qemu_io(). In
case of failure, qemu_io() raises an exception with the output of the
qemu-io binary in its message, so we don't actually lose anything.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20230511143801.255021-1-kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
tests/qemu-iotests/245 | 7 ++++---
tests/qemu-iotests/245.out | 9 +--------
2 files changed, 5 insertions(+), 11 deletions(-)
diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245
index edaf29094b..92b28c79be 100755
--- a/tests/qemu-iotests/245
+++ b/tests/qemu-iotests/245
@@ -611,6 +611,7 @@ class TestBlockdevReopen(iotests.QMPTestCase):
self.reopen(hd0_opts, {'file': 'hd0-file'})
# Insert (and remove) a compress filter
+ @iotests.skip_if_unsupported(['compress'])
def test_insert_compress_filter(self):
# Add an image to the VM: hd (raw) -> hd0 (qcow2) -> hd0-file (file)
opts = {'driver': 'raw', 'node-name': 'hd', 'file': hd_opts(0)}
@@ -650,9 +651,9 @@ class TestBlockdevReopen(iotests.QMPTestCase):
# Check the first byte of the first three L2 entries and verify that
# the second one is compressed (0x40) while the others are not (0x80)
- iotests.qemu_io_log('-f', 'raw', '-c', 'read -P 0x80 0x40000 1',
- '-c', 'read -P 0x40 0x40008 1',
- '-c', 'read -P 0x80 0x40010 1', hd_path[0])
+ iotests.qemu_io('-f', 'raw', '-c', 'read -P 0x80 0x40000 1',
+ '-c', 'read -P 0x40 0x40008 1',
+ '-c', 'read -P 0x80 0x40010 1', hd_path[0])
# Swap the disk images of two active block devices
def test_swap_files(self):
diff --git a/tests/qemu-iotests/245.out b/tests/qemu-iotests/245.out
index a4e04a3266..0970ced62a 100644
--- a/tests/qemu-iotests/245.out
+++ b/tests/qemu-iotests/245.out
@@ -10,14 +10,7 @@
{"return": {}}
{"data": {"id": "stream0", "type": "stream"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
{"data": {"device": "stream0", "len": 3145728, "offset": 3145728, "speed": 0, "type": "stream"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
-....read 1/1 bytes at offset 262144
-1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 1/1 bytes at offset 262152
-1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 1/1 bytes at offset 262160
-1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-
-................
+....................
----------------------------------------------------------------------
Ran 26 tests
--
2.40.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [PULL 17/18] aio-posix: do not nest poll handlers
2023-05-17 16:50 [PULL 00/18] Block layer patches Kevin Wolf
` (15 preceding siblings ...)
2023-05-17 16:51 ` [PULL 16/18] iotests/245: Check if 'compress' driver is available Kevin Wolf
@ 2023-05-17 16:51 ` Kevin Wolf
2023-05-18 7:13 ` Michael Tokarev
2023-05-17 16:51 ` [PULL 18/18] tested: add test for nested aio_poll() in " Kevin Wolf
17 siblings, 1 reply; 24+ messages in thread
From: Kevin Wolf @ 2023-05-17 16:51 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, richard.henderson, qemu-devel
From: Stefan Hajnoczi <stefanha@redhat.com>
QEMU's event loop supports nesting, which means that event handler
functions may themselves call aio_poll(). The condition that triggered a
handler must be reset before the nested aio_poll() call, otherwise the
same handler will be called and immediately re-enter aio_poll. This
leads to an infinite loop and stack exhaustion.
Poll handlers are especially prone to this issue, because they typically
reset their condition by finishing the processing of pending work.
Unfortunately it is during the processing of pending work that nested
aio_poll() calls typically occur and the condition has not yet been
reset.
Disable a poll handler during ->io_poll_ready() so that a nested
aio_poll() call cannot invoke ->io_poll_ready() again. As a result, the
disabled poll handler and its associated fd handler do not run during
the nested aio_poll(). Calling aio_set_fd_handler() from inside nested
aio_poll() could cause it to run again. If the fd handler is pending
inside nested aio_poll(), then it will also run again.
In theory fd handlers can be affected by the same issue, but they are
more likely to reset the condition before calling nested aio_poll().
This is a special case and it's somewhat complex, but I don't see a way
around it as long as nested aio_poll() is supported.
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2186181
Fixes: c38270692593 ("block: Mark bdrv_co_io_(un)plug() and callers GRAPH_RDLOCK")
Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20230502184134.534703-2-stefanha@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
util/aio-posix.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/util/aio-posix.c b/util/aio-posix.c
index a8be940f76..34bc2a64d8 100644
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -353,8 +353,19 @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
poll_ready && revents == 0 &&
aio_node_check(ctx, node->is_external) &&
node->io_poll_ready) {
+ /*
+ * Remove temporarily to avoid infinite loops when ->io_poll_ready()
+ * calls aio_poll() before clearing the condition that made the poll
+ * handler become ready.
+ */
+ QLIST_SAFE_REMOVE(node, node_poll);
+
node->io_poll_ready(node->opaque);
+ if (!QLIST_IS_INSERTED(node, node_poll)) {
+ QLIST_INSERT_HEAD(&ctx->poll_aio_handlers, node, node_poll);
+ }
+
/*
* Return early since revents was zero. aio_notify() does not count as
* progress.
--
2.40.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [PULL 17/18] aio-posix: do not nest poll handlers
2023-05-17 16:51 ` [PULL 17/18] aio-posix: do not nest poll handlers Kevin Wolf
@ 2023-05-18 7:13 ` Michael Tokarev
2023-05-18 15:05 ` Stefan Hajnoczi
0 siblings, 1 reply; 24+ messages in thread
From: Michael Tokarev @ 2023-05-18 7:13 UTC (permalink / raw)
To: Kevin Wolf, qemu-block; +Cc: richard.henderson, qemu-devel, Stefan Hajnoczi
17.05.2023 19:51, Kevin Wolf wrote:
> From: Stefan Hajnoczi <stefanha@redhat.com>
>
> QEMU's event loop supports nesting, which means that event handler
> functions may themselves call aio_poll(). The condition that triggered a
> handler must be reset before the nested aio_poll() call, otherwise the
> same handler will be called and immediately re-enter aio_poll. This
> leads to an infinite loop and stack exhaustion.
>
> Poll handlers are especially prone to this issue, because they typically
> reset their condition by finishing the processing of pending work.
> Unfortunately it is during the processing of pending work that nested
> aio_poll() calls typically occur and the condition has not yet been
> reset.
>
> Disable a poll handler during ->io_poll_ready() so that a nested
> aio_poll() call cannot invoke ->io_poll_ready() again. As a result, the
> disabled poll handler and its associated fd handler do not run during
> the nested aio_poll(). Calling aio_set_fd_handler() from inside nested
> aio_poll() could cause it to run again. If the fd handler is pending
> inside nested aio_poll(), then it will also run again.
>
> In theory fd handlers can be affected by the same issue, but they are
> more likely to reset the condition before calling nested aio_poll().
>
> This is a special case and it's somewhat complex, but I don't see a way
> around it as long as nested aio_poll() is supported.
>
> Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2186181
> Fixes: c38270692593 ("block: Mark bdrv_co_io_(un)plug() and callers GRAPH_RDLOCK")
Is it not a stable-8.0 material?
Thanks,
/mjt
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PULL 17/18] aio-posix: do not nest poll handlers
2023-05-18 7:13 ` Michael Tokarev
@ 2023-05-18 15:05 ` Stefan Hajnoczi
0 siblings, 0 replies; 24+ messages in thread
From: Stefan Hajnoczi @ 2023-05-18 15:05 UTC (permalink / raw)
To: Michael Tokarev; +Cc: Kevin Wolf, qemu-block, richard.henderson, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 1780 bytes --]
On Thu, May 18, 2023 at 10:13:23AM +0300, Michael Tokarev wrote:
> 17.05.2023 19:51, Kevin Wolf wrote:
> > From: Stefan Hajnoczi <stefanha@redhat.com>
> >
> > QEMU's event loop supports nesting, which means that event handler
> > functions may themselves call aio_poll(). The condition that triggered a
> > handler must be reset before the nested aio_poll() call, otherwise the
> > same handler will be called and immediately re-enter aio_poll. This
> > leads to an infinite loop and stack exhaustion.
> >
> > Poll handlers are especially prone to this issue, because they typically
> > reset their condition by finishing the processing of pending work.
> > Unfortunately it is during the processing of pending work that nested
> > aio_poll() calls typically occur and the condition has not yet been
> > reset.
> >
> > Disable a poll handler during ->io_poll_ready() so that a nested
> > aio_poll() call cannot invoke ->io_poll_ready() again. As a result, the
> > disabled poll handler and its associated fd handler do not run during
> > the nested aio_poll(). Calling aio_set_fd_handler() from inside nested
> > aio_poll() could cause it to run again. If the fd handler is pending
> > inside nested aio_poll(), then it will also run again.
> >
> > In theory fd handlers can be affected by the same issue, but they are
> > more likely to reset the condition before calling nested aio_poll().
> >
> > This is a special case and it's somewhat complex, but I don't see a way
> > around it as long as nested aio_poll() is supported.
> >
> > Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2186181
> > Fixes: c38270692593 ("block: Mark bdrv_co_io_(un)plug() and callers GRAPH_RDLOCK")
>
> Is it not a stable-8.0 material?
Yes.
Stefan
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PULL 18/18] tested: add test for nested aio_poll() in poll handlers
2023-05-17 16:50 [PULL 00/18] Block layer patches Kevin Wolf
` (16 preceding siblings ...)
2023-05-17 16:51 ` [PULL 17/18] aio-posix: do not nest poll handlers Kevin Wolf
@ 2023-05-17 16:51 ` Kevin Wolf
2023-05-17 19:10 ` Richard Henderson
17 siblings, 1 reply; 24+ messages in thread
From: Kevin Wolf @ 2023-05-17 16:51 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, richard.henderson, qemu-devel
From: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20230502184134.534703-3-stefanha@redhat.com>
Tested-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
tests/unit/test-nested-aio-poll.c | 130 ++++++++++++++++++++++++++++++
tests/unit/meson.build | 1 +
2 files changed, 131 insertions(+)
create mode 100644 tests/unit/test-nested-aio-poll.c
diff --git a/tests/unit/test-nested-aio-poll.c b/tests/unit/test-nested-aio-poll.c
new file mode 100644
index 0000000000..9bbe18b839
--- /dev/null
+++ b/tests/unit/test-nested-aio-poll.c
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Test that poll handlers are not re-entrant in nested aio_poll()
+ *
+ * Copyright Red Hat
+ *
+ * Poll handlers are usually level-triggered. That means they continue firing
+ * until the condition is reset (e.g. a virtqueue becomes empty). If a poll
+ * handler calls nested aio_poll() before the condition is reset, then infinite
+ * recursion occurs.
+ *
+ * aio_poll() is supposed to prevent this by disabling poll handlers in nested
+ * aio_poll() calls. This test case checks that this is indeed what happens.
+ */
+#include "qemu/osdep.h"
+#include "block/aio.h"
+#include "qapi/error.h"
+
+typedef struct {
+ AioContext *ctx;
+
+ /* This is the EventNotifier that drives the test */
+ EventNotifier poll_notifier;
+
+ /* This EventNotifier is only used to wake aio_poll() */
+ EventNotifier dummy_notifier;
+
+ bool nested;
+} TestData;
+
+static void io_read(EventNotifier *notifier)
+{
+ fprintf(stderr, "%s %p\n", __func__, notifier);
+ event_notifier_test_and_clear(notifier);
+}
+
+static bool io_poll_true(void *opaque)
+{
+ fprintf(stderr, "%s %p\n", __func__, opaque);
+ return true;
+}
+
+static bool io_poll_false(void *opaque)
+{
+ fprintf(stderr, "%s %p\n", __func__, opaque);
+ return false;
+}
+
+static void io_poll_ready(EventNotifier *notifier)
+{
+ TestData *td = container_of(notifier, TestData, poll_notifier);
+
+ fprintf(stderr, "> %s\n", __func__);
+
+ g_assert(!td->nested);
+ td->nested = true;
+
+ /* Wake the following nested aio_poll() call */
+ event_notifier_set(&td->dummy_notifier);
+
+ /* This nested event loop must not call io_poll()/io_poll_ready() */
+ g_assert(aio_poll(td->ctx, true));
+
+ td->nested = false;
+
+ fprintf(stderr, "< %s\n", __func__);
+}
+
+/* dummy_notifier never triggers */
+static void io_poll_never_ready(EventNotifier *notifier)
+{
+ g_assert_not_reached();
+}
+
+static void test(void)
+{
+ TestData td = {
+ .ctx = aio_context_new(&error_abort),
+ };
+
+ qemu_set_current_aio_context(td.ctx);
+
+ /* Enable polling */
+ aio_context_set_poll_params(td.ctx, 1000000, 2, 2, &error_abort);
+
+ /*
+ * The GSource is unused but this has the side-effect of changing the fdmon
+ * that AioContext uses.
+ */
+ aio_get_g_source(td.ctx);
+
+ /* Make the event notifier active (set) right away */
+ event_notifier_init(&td.poll_notifier, 1);
+ aio_set_event_notifier(td.ctx, &td.poll_notifier, false,
+ io_read, io_poll_true, io_poll_ready);
+
+ /* This event notifier will be used later */
+ event_notifier_init(&td.dummy_notifier, 0);
+ aio_set_event_notifier(td.ctx, &td.dummy_notifier, false,
+ io_read, io_poll_false, io_poll_never_ready);
+
+ /* Consume aio_notify() */
+ g_assert(!aio_poll(td.ctx, false));
+
+ /*
+ * Run the io_read() handler. This has the side-effect of activating
+ * polling in future aio_poll() calls.
+ */
+ g_assert(aio_poll(td.ctx, true));
+
+ /* The second time around the io_poll()/io_poll_ready() handler runs */
+ g_assert(aio_poll(td.ctx, true));
+
+ /* Run io_poll()/io_poll_ready() one more time to show it keeps working */
+ g_assert(aio_poll(td.ctx, true));
+
+ aio_set_event_notifier(td.ctx, &td.dummy_notifier, false,
+ NULL, NULL, NULL);
+ aio_set_event_notifier(td.ctx, &td.poll_notifier, false, NULL, NULL, NULL);
+ event_notifier_cleanup(&td.dummy_notifier);
+ event_notifier_cleanup(&td.poll_notifier);
+ aio_context_unref(td.ctx);
+}
+
+int main(int argc, char **argv)
+{
+ g_test_init(&argc, &argv, NULL);
+ g_test_add_func("/nested-aio-poll", test);
+ return g_test_run();
+}
diff --git a/tests/unit/meson.build b/tests/unit/meson.build
index 3bc78d8660..a314f82baa 100644
--- a/tests/unit/meson.build
+++ b/tests/unit/meson.build
@@ -67,6 +67,7 @@ if have_block
'test-coroutine': [testblock],
'test-aio': [testblock],
'test-aio-multithread': [testblock],
+ 'test-nested-aio-poll': [testblock],
'test-throttle': [testblock],
'test-thread-pool': [testblock],
'test-hbitmap': [testblock],
--
2.40.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [PULL 18/18] tested: add test for nested aio_poll() in poll handlers
2023-05-17 16:51 ` [PULL 18/18] tested: add test for nested aio_poll() in " Kevin Wolf
@ 2023-05-17 19:10 ` Richard Henderson
2023-05-19 9:23 ` Kevin Wolf
0 siblings, 1 reply; 24+ messages in thread
From: Richard Henderson @ 2023-05-17 19:10 UTC (permalink / raw)
To: Kevin Wolf, qemu-block; +Cc: qemu-devel
On 5/17/23 09:51, Kevin Wolf wrote:
> From: Stefan Hajnoczi <stefanha@redhat.com>
>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> Message-Id: <20230502184134.534703-3-stefanha@redhat.com>
> Tested-by: Kevin Wolf <kwolf@redhat.com>
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
> tests/unit/test-nested-aio-poll.c | 130 ++++++++++++++++++++++++++++++
> tests/unit/meson.build | 1 +
> 2 files changed, 131 insertions(+)
> create mode 100644 tests/unit/test-nested-aio-poll.c
This new test fails on windows:
https://gitlab.com/qemu-project/qemu/-/jobs/4304413315#L3375
https://gitlab.com/qemu-project/qemu/-/jobs/4304413313#L3357
r~
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PULL 18/18] tested: add test for nested aio_poll() in poll handlers
2023-05-17 19:10 ` Richard Henderson
@ 2023-05-19 9:23 ` Kevin Wolf
2023-05-23 15:36 ` Stefan Hajnoczi
0 siblings, 1 reply; 24+ messages in thread
From: Kevin Wolf @ 2023-05-19 9:23 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-block, qemu-devel, stefanha
Am 17.05.2023 um 21:10 hat Richard Henderson geschrieben:
> On 5/17/23 09:51, Kevin Wolf wrote:
> > From: Stefan Hajnoczi <stefanha@redhat.com>
> >
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > Message-Id: <20230502184134.534703-3-stefanha@redhat.com>
> > Tested-by: Kevin Wolf <kwolf@redhat.com>
> > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> > ---
> > tests/unit/test-nested-aio-poll.c | 130 ++++++++++++++++++++++++++++++
> > tests/unit/meson.build | 1 +
> > 2 files changed, 131 insertions(+)
> > create mode 100644 tests/unit/test-nested-aio-poll.c
>
> This new test fails on windows:
>
> https://gitlab.com/qemu-project/qemu/-/jobs/4304413315#L3375
> https://gitlab.com/qemu-project/qemu/-/jobs/4304413313#L3357
What the CI output doesn't show is that the problem seems to be that the
test doesn't even make sense on Windows. When I run it manually:
Unexpected error in aio_context_set_poll_params() at ../../home/kwolf/source/qemu/util/aio-win32.c:443:
Z:\tmp\build-win32\tests\unit\test-nested-aio-poll.exe: AioContext polling is not implemented on Windows
Stefan, I'll squash in the following, so you don't have to resubmit the
series.
Kevin
diff --git a/tests/unit/meson.build b/tests/unit/meson.build
index a314f82baa..8ed81786ee 100644
--- a/tests/unit/meson.build
+++ b/tests/unit/meson.build
@@ -67,7 +67,6 @@ if have_block
'test-coroutine': [testblock],
'test-aio': [testblock],
'test-aio-multithread': [testblock],
- 'test-nested-aio-poll': [testblock],
'test-throttle': [testblock],
'test-thread-pool': [testblock],
'test-hbitmap': [testblock],
@@ -115,7 +114,10 @@ if have_block
tests += {'test-crypto-xts': [crypto, io]}
endif
if 'CONFIG_POSIX' in config_host
- tests += {'test-image-locking': [testblock]}
+ tests += {
+ 'test-image-locking': [testblock],
+ 'test-nested-aio-poll': [testblock],
+ }
endif
if config_host_data.get('CONFIG_REPLICATION')
tests += {'test-replication': [testblock]}
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [PULL 18/18] tested: add test for nested aio_poll() in poll handlers
2023-05-19 9:23 ` Kevin Wolf
@ 2023-05-23 15:36 ` Stefan Hajnoczi
0 siblings, 0 replies; 24+ messages in thread
From: Stefan Hajnoczi @ 2023-05-23 15:36 UTC (permalink / raw)
To: Kevin Wolf; +Cc: Richard Henderson, qemu-block, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 2279 bytes --]
On Fri, May 19, 2023 at 11:23:30AM +0200, Kevin Wolf wrote:
> Am 17.05.2023 um 21:10 hat Richard Henderson geschrieben:
> > On 5/17/23 09:51, Kevin Wolf wrote:
> > > From: Stefan Hajnoczi <stefanha@redhat.com>
> > >
> > > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > > Message-Id: <20230502184134.534703-3-stefanha@redhat.com>
> > > Tested-by: Kevin Wolf <kwolf@redhat.com>
> > > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> > > ---
> > > tests/unit/test-nested-aio-poll.c | 130 ++++++++++++++++++++++++++++++
> > > tests/unit/meson.build | 1 +
> > > 2 files changed, 131 insertions(+)
> > > create mode 100644 tests/unit/test-nested-aio-poll.c
> >
> > This new test fails on windows:
> >
> > https://gitlab.com/qemu-project/qemu/-/jobs/4304413315#L3375
> > https://gitlab.com/qemu-project/qemu/-/jobs/4304413313#L3357
>
> What the CI output doesn't show is that the problem seems to be that the
> test doesn't even make sense on Windows. When I run it manually:
>
> Unexpected error in aio_context_set_poll_params() at ../../home/kwolf/source/qemu/util/aio-win32.c:443:
> Z:\tmp\build-win32\tests\unit\test-nested-aio-poll.exe: AioContext polling is not implemented on Windows
>
> Stefan, I'll squash in the following, so you don't have to resubmit the
> series.
Thank you, Kevin!
Stefan
> diff --git a/tests/unit/meson.build b/tests/unit/meson.build
> index a314f82baa..8ed81786ee 100644
> --- a/tests/unit/meson.build
> +++ b/tests/unit/meson.build
> @@ -67,7 +67,6 @@ if have_block
> 'test-coroutine': [testblock],
> 'test-aio': [testblock],
> 'test-aio-multithread': [testblock],
> - 'test-nested-aio-poll': [testblock],
> 'test-throttle': [testblock],
> 'test-thread-pool': [testblock],
> 'test-hbitmap': [testblock],
> @@ -115,7 +114,10 @@ if have_block
> tests += {'test-crypto-xts': [crypto, io]}
> endif
> if 'CONFIG_POSIX' in config_host
> - tests += {'test-image-locking': [testblock]}
> + tests += {
> + 'test-image-locking': [testblock],
> + 'test-nested-aio-poll': [testblock],
> + }
> endif
> if config_host_data.get('CONFIG_REPLICATION')
> tests += {'test-replication': [testblock]}
>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread