qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Max Reitz <mreitz@redhat.com>
To: Fam Zheng <famz@redhat.com>
Cc: Kevin Wolf <kwolf@redhat.com>,
	qemu-devel@nongnu.org, Stefan Hajnoczi <stefanha@redhat.com>,
	Markus Armbruster <armbru@redhat.com>
Subject: Re: [Qemu-devel] [PATCH v4 2/3] block: Add blockdev-backup to transaction
Date: Fri, 05 Dec 2014 15:47:23 +0100	[thread overview]
Message-ID: <5481C57B.60002@redhat.com> (raw)
In-Reply-To: <548179C0.5050305@redhat.com>

On 2014-12-05 at 10:24, Max Reitz wrote:
> On 2014-12-05 at 07:37, Fam Zheng wrote:
>> On Thu, 12/04 14:59, Max Reitz wrote:
>>> On 2014-12-04 at 03:29, Fam Zheng wrote:
>>>> Also add version info for other transaction types.
>>>>
>>>> Signed-off-by: Fam Zheng <famz@redhat.com>
>>>> ---
>>>>   blockdev.c       | 81 
>>>> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>   qapi-schema.json |  7 +++++
>>>>   2 files changed, 88 insertions(+)
>>>>
>>>> diff --git a/blockdev.c b/blockdev.c
>>>> index f44441a..a98a4f8 100644
>>>> --- a/blockdev.c
>>>> +++ b/blockdev.c
>>>> @@ -1559,6 +1559,81 @@ static void 
>>>> drive_backup_clean(BlkTransactionState *common)
>>>>       }
>>>>   }
>>>> +typedef struct BlockdevBackupState {
>>>> +    BlkTransactionState common;
>>>> +    BlockDriverState *bs;
>>>> +    BlockJob *job;
>>>> +    AioContext *aio_context;
>>>> +} BlockdevBackupState;
>>>> +
>>>> +static void blockdev_backup_prepare(BlkTransactionState *common, 
>>>> Error **errp)
>>>> +{
>>>> +    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, 
>>>> common, common);
>>>> +    BlockdevBackup *backup;
>>>> +    BlockDriverState *bs, *target;
>>>> +    Error *local_err = NULL;
>>>> +
>>>> +    assert(common->action->kind == 
>>>> TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP);
>>>> +    backup = common->action->blockdev_backup;
>>>> +
>>>> +    bs = bdrv_find(backup->device);
>>>> +    if (!bs) {
>>>> +        error_set(errp, QERR_DEVICE_NOT_FOUND, backup->device);
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    target = bdrv_find(backup->target);
>>>> +    if (!target) {
>>>> +        error_set(errp, QERR_DEVICE_NOT_FOUND, backup->target);
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    /* AioContext is released in .clean() */
>>>> +    state->aio_context = bdrv_get_aio_context(bs);
>>>> +    if (state->aio_context != bdrv_get_aio_context(target)) {
>>>> +        state->aio_context = NULL;
>>>> +        error_setg(errp, "Backup between two IO threads are not 
>>>> implemented");
>>> Either *Backups ore s/are/is/.
>>>
>>>> +        return;
>>>> +    }
>>>> +    aio_context_acquire(state->aio_context);
>>>> +
>>>> +    qmp_blockdev_backup(backup->device, backup->target,
>>>> +                        backup->sync,
>>>> +                        backup->has_speed, backup->speed,
>>>> +                        backup->has_on_source_error, 
>>>> backup->on_source_error,
>>>> +                        backup->has_on_target_error, 
>>>> backup->on_target_error,
>>>> +                        &local_err);
>>>> +    if (local_err) {
>>>> +        error_propagate(errp, local_err);
>>>> +        state->bs = NULL;
>>>> +        state->job = NULL;
>>> No need for these assignments, state is 0-initialized. I wouldn't 
>>> point that
>>> out if Stefan wouldn't just have sent a patch which removed such 
>>> assignments
>>> in some other transaction preparation.
>>>
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    state->bs = bdrv_find(backup->device);
>>>> +    state->job = state->bs->job;
>>>> +}
>>>> +
>>>> +static void blockdev_backup_abort(BlkTransactionState *common)
>>>> +{
>>>> +    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, 
>>>> common, common);
>>>> +    BlockDriverState *bs = state->bs;
>>>> +
>>>> +    /* Only cancel if it's the job we started */
>>>> +    if (bs && bs->job && bs->job == state->job) {
>>>> +        block_job_cancel_sync(bs->job);
>>>> +    }
>>>> +}
>>>> +
>>>> +static void blockdev_backup_clean(BlkTransactionState *common)
>>>> +{
>>>> +    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, 
>>>> common, common);
>>>> +
>>>> +    if (state->aio_context) {
>>>> +        aio_context_release(state->aio_context);
>>>> +    }
>>>> +}
>>>> +
>>>>   static void abort_prepare(BlkTransactionState *common, Error **errp)
>>>>   {
>>>>       error_setg(errp, "Transaction aborted using Abort action");
>>>> @@ -1582,6 +1657,12 @@ static const BdrvActionOps actions[] = {
>>>>           .abort = drive_backup_abort,
>>>>           .clean = drive_backup_clean,
>>>>       },
>>>> +    [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = {
>>>> +        .instance_size = sizeof(BlockdevBackupState),
>>>> +        .prepare = blockdev_backup_prepare,
>>>> +        .abort = blockdev_backup_abort,
>>>> +        .clean = blockdev_backup_clean,
>>>> +    },
>>>>       [TRANSACTION_ACTION_KIND_ABORT] = {
>>>>           .instance_size = sizeof(BlkTransactionState),
>>>>           .prepare = abort_prepare,
>>>> diff --git a/qapi-schema.json b/qapi-schema.json
>>>> index 9ffdcf8..411d287 100644
>>>> --- a/qapi-schema.json
>>>> +++ b/qapi-schema.json
>>>> @@ -1254,11 +1254,18 @@
>>>>   #
>>>>   # A discriminated record of operations that can be performed with
>>>>   # @transaction.
>>>> +#
>>>> +# Since 1.1
>>>> +# drive-backup since 1.6
>>>> +# abort since 1.6
>>>> +# blockdev-snapshot-internal-sync since 1.7
>>>> +# blockdev-backup since 2.3
>>> This seems a bit hard to read... Maybe an empty line after the 
>>> "Since 1.1"
>>> would help, but I'm not sure...
>>>
>>>>   ##
>>>>   { 'union': 'TransactionAction',
>>>>     'data': {
>>>>          'blockdev-snapshot-sync': 'BlockdevSnapshot',
>>>>          'drive-backup': 'DriveBackup',
>>>> +       'blockdev-backup': 'BlockdevBackup',
>>>>          'abort': 'Abort',
>>>>          'blockdev-snapshot-internal-sync': 'BlockdevSnapshotInternal'
>>>>      } }
>>> So, about this patch in general: I know drive-backup works nearly 
>>> the same
>>> way. It starts block job in prepare(), which is aborted in abort(). 
>>> But it
>>> seems a bit like cheating to me. For me, a transaction is something 
>>> which
>>> you can start and if any of the operations cannot be executed 
>>> (because its
>>> preparation failed), all are aborted (that is, not even started). The
>>> commit() part will really do the operation, and that will never fail 
>>> because
>>> prepare() has made sure it will not.
>>>
>>> This isn't the case when starting block jobs in prepare(). If some 
>>> other
>>> operation's prepare() fails, some data may have been copied already, 
>>> so you
>>> can't really roll back the operation. It isn't so bad for drive-backup,
>>> because you're normally writing to a new image, so having 
>>> overwritten that
>>> image by a bit isn't so bad. But with blockdev-backup, you're 
>>> overwriting a
>>> block device inside qemu, so overwriting it partially may actually 
>>> be bad
>>> (or even overwriting it completely; if a transaction fails, I'd 
>>> expect the
>>> operations to have done nothing at all).
>>>
>>> I understand that drive-backup does the same thing already (although 
>>> I don't
>>> deem the impact there a bit less), so it may just be that my notion of
>>> transactions is wrong.
>>>
>>> However, in this state, where is the advantage of making this an 
>>> operation
>>> usable in a transaction? I can just start a number of 
>>> blockdev-backup block
>>> jobs manually and cancel them if anything goes wrong. There's no 
>>> difference,
>>> so I don't see the benefit of making it a transaction operation if a
>>> transaction does not actually mean "Do not do anything if we don't 
>>> know for
>>> sure that all operations will complete successfully."
>> It's still different. When you start a number of drive-backup or
>> blockdev-backup in a transaction, all the disks' data are from a 
>> single point
>> of time. That is important for backup use cases.
>>
>> When you start jobs manually, the guest may already changed some 
>> data, so the
>> disks are not consistent. Think of Linux guest dm soft raid use case.
>
> Well, I don't know how bad a stop-continue wrapper around starting the 
> block jobs would actually be (technically the VM is stopped during the 
> transaction preparation as well), but fine.
>
> I still wouldn't call this a transaction but rather a group operation. 
> Stefan defined transactions to be "atomic group operations". 
> drive-backup is not and blockdev-backup will not be atomic. But we 
> already made the mistake with drive-backup so we can not really fix it 
> (if it is a mistake at all, of course, which I'm assuming it is but I 
> may be very wrong).
>
> So, assuming it is a mistake (disregard all of the following if 
> transactions are not intended to be completely atomic):
>
> To try to mitigate damage I'd vote for introducing GroupOperations in 
> contrast to Transactions (transactions are atomic, group operations 
> are not). Non-atomic group operations will not have a commit() and 
> probably also no abort() or clean(). They will just be started and 
> that's it. We could then for example just allow any block job to be 
> started in a group operation without having to write specialized code 
> for all of them.
>
> We would then have to call the fact that drive-backup is available for 
> transactions just a quirky slip. Not good, but that's all we can do 
> about it now.
>
> Alternatively, we can make blockdev-backup a transaction and then 
> explicitly state for drive-backup and blockdev-backup that those 
> transactions are not really atomic. We'd have to describe exactly what 
> happens when they are aborted (which is that a potentially already 
> started block job will be aborted, but it is undefined how much of the 
> block job has been done). I think it's worse than introducing 
> GroupOperation and making the drive-backup transaction a deprecated 
> legacy slip, but of course it'd be much easier.
>
> So if we decide to go for GroupOperation, I wouldn't do it in this 
> series (which means that this patch would have to wait for later). If 
> we don't, this patch can stay and the documentation (what happens on 
> abort) may follow later.

I just discussed this issue with Kevin, and I was wrong. The faulty 
assumption was that the drive-backup and blockdev-backup operations 
signify the full block job; whereas they actually only start the block 
job. So the atomic operation in question is starting the block job.

The question now is whether the block jobs can write data after they 
have been started and before they would be aborted, because aborting 
them would then be too late. qmp_drive_backup() creates the block job 
and indeed enters it; however, before anything is written, it will 
always yield (and directly afterwards they check once again whether they 
have been aborted). Therefore, no data will be written before 
qmp_transaction() returns and aborting the block jobs will cancel it 
before anything has been written. Great!

The only change which will not be reverted is having created the target 
file. Not nice, but not critical either, and not much we can do about it 
anyway.

Of course, as patch 1 reuses a lot of code from qmp_drive_backup(), 
blockdev-backup will therefore be fine, too.

tl;dr: We don't need GroupOperations, making this a transaction is 
completely fine. Because I only saw some minor issues (I'd really like 
to have the NULL assignments removed, though, because it may be 
confusing to see them in that error path but not in the one right before 
it):

Reviewed-by: Max Reitz <mreitz@redhat.com>

  reply	other threads:[~2014-12-05 14:47 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-12-04  2:29 [Qemu-devel] [PATCH v4 0/3] qmp: Add "blockdev-backup" Fam Zheng
2014-12-04  2:29 ` [Qemu-devel] [PATCH v4 1/3] qmp: Add command 'blockdev-backup' Fam Zheng
2014-12-04 13:43   ` Max Reitz
2014-12-05  6:12     ` Fam Zheng
2014-12-05  9:10       ` Max Reitz
2014-12-19  8:47     ` Markus Armbruster
2014-12-04  2:29 ` [Qemu-devel] [PATCH v4 2/3] block: Add blockdev-backup to transaction Fam Zheng
2014-12-04 13:59   ` Max Reitz
2014-12-05  6:37     ` Fam Zheng
2014-12-05  9:24       ` Max Reitz
2014-12-05 14:47         ` Max Reitz [this message]
2014-12-04  2:29 ` [Qemu-devel] [PATCH v4 3/3] qemu-iotests: Test blockdev-backup in 055 Fam Zheng
2014-12-04 14:21   ` Max Reitz

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5481C57B.60002@redhat.com \
    --to=mreitz@redhat.com \
    --cc=armbru@redhat.com \
    --cc=famz@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).