All of lore.kernel.org
 help / color / mirror / Atom feed
From: Max Reitz <mreitz@redhat.com>
To: Fam Zheng <famz@redhat.com>
Cc: Kevin Wolf <kwolf@redhat.com>,
	qemu-devel@nongnu.org, Stefan Hajnoczi <stefanha@redhat.com>,
	Markus Armbruster <armbru@redhat.com>
Subject: Re: [Qemu-devel] [PATCH v4 2/3] block: Add blockdev-backup to transaction
Date: Fri, 05 Dec 2014 10:24:16 +0100	[thread overview]
Message-ID: <548179C0.5050305@redhat.com> (raw)
In-Reply-To: <20141205063724.GB15691@ad.nay.redhat.com>

On 2014-12-05 at 07:37, Fam Zheng wrote:
> On Thu, 12/04 14:59, Max Reitz wrote:
>> On 2014-12-04 at 03:29, Fam Zheng wrote:
>>> Also add version info for other transaction types.
>>>
>>> Signed-off-by: Fam Zheng <famz@redhat.com>
>>> ---
>>>   blockdev.c       | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>   qapi-schema.json |  7 +++++
>>>   2 files changed, 88 insertions(+)
>>>
>>> diff --git a/blockdev.c b/blockdev.c
>>> index f44441a..a98a4f8 100644
>>> --- a/blockdev.c
>>> +++ b/blockdev.c
>>> @@ -1559,6 +1559,81 @@ static void drive_backup_clean(BlkTransactionState *common)
>>>       }
>>>   }
>>> +typedef struct BlockdevBackupState {
>>> +    BlkTransactionState common;
>>> +    BlockDriverState *bs;
>>> +    BlockJob *job;
>>> +    AioContext *aio_context;
>>> +} BlockdevBackupState;
>>> +
>>> +static void blockdev_backup_prepare(BlkTransactionState *common, Error **errp)
>>> +{
>>> +    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
>>> +    BlockdevBackup *backup;
>>> +    BlockDriverState *bs, *target;
>>> +    Error *local_err = NULL;
>>> +
>>> +    assert(common->action->kind == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP);
>>> +    backup = common->action->blockdev_backup;
>>> +
>>> +    bs = bdrv_find(backup->device);
>>> +    if (!bs) {
>>> +        error_set(errp, QERR_DEVICE_NOT_FOUND, backup->device);
>>> +        return;
>>> +    }
>>> +
>>> +    target = bdrv_find(backup->target);
>>> +    if (!target) {
>>> +        error_set(errp, QERR_DEVICE_NOT_FOUND, backup->target);
>>> +        return;
>>> +    }
>>> +
>>> +    /* AioContext is released in .clean() */
>>> +    state->aio_context = bdrv_get_aio_context(bs);
>>> +    if (state->aio_context != bdrv_get_aio_context(target)) {
>>> +        state->aio_context = NULL;
>>> +        error_setg(errp, "Backup between two IO threads are not implemented");
>> Either *Backups ore s/are/is/.
>>
>>> +        return;
>>> +    }
>>> +    aio_context_acquire(state->aio_context);
>>> +
>>> +    qmp_blockdev_backup(backup->device, backup->target,
>>> +                        backup->sync,
>>> +                        backup->has_speed, backup->speed,
>>> +                        backup->has_on_source_error, backup->on_source_error,
>>> +                        backup->has_on_target_error, backup->on_target_error,
>>> +                        &local_err);
>>> +    if (local_err) {
>>> +        error_propagate(errp, local_err);
>>> +        state->bs = NULL;
>>> +        state->job = NULL;
>> No need for these assignments, state is 0-initialized. I wouldn't point that
>> out if Stefan wouldn't just have sent a patch which removed such assignments
>> in some other transaction preparation.
>>
>>> +        return;
>>> +    }
>>> +
>>> +    state->bs = bdrv_find(backup->device);
>>> +    state->job = state->bs->job;
>>> +}
>>> +
>>> +static void blockdev_backup_abort(BlkTransactionState *common)
>>> +{
>>> +    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
>>> +    BlockDriverState *bs = state->bs;
>>> +
>>> +    /* Only cancel if it's the job we started */
>>> +    if (bs && bs->job && bs->job == state->job) {
>>> +        block_job_cancel_sync(bs->job);
>>> +    }
>>> +}
>>> +
>>> +static void blockdev_backup_clean(BlkTransactionState *common)
>>> +{
>>> +    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
>>> +
>>> +    if (state->aio_context) {
>>> +        aio_context_release(state->aio_context);
>>> +    }
>>> +}
>>> +
>>>   static void abort_prepare(BlkTransactionState *common, Error **errp)
>>>   {
>>>       error_setg(errp, "Transaction aborted using Abort action");
>>> @@ -1582,6 +1657,12 @@ static const BdrvActionOps actions[] = {
>>>           .abort = drive_backup_abort,
>>>           .clean = drive_backup_clean,
>>>       },
>>> +    [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = {
>>> +        .instance_size = sizeof(BlockdevBackupState),
>>> +        .prepare = blockdev_backup_prepare,
>>> +        .abort = blockdev_backup_abort,
>>> +        .clean = blockdev_backup_clean,
>>> +    },
>>>       [TRANSACTION_ACTION_KIND_ABORT] = {
>>>           .instance_size = sizeof(BlkTransactionState),
>>>           .prepare = abort_prepare,
>>> diff --git a/qapi-schema.json b/qapi-schema.json
>>> index 9ffdcf8..411d287 100644
>>> --- a/qapi-schema.json
>>> +++ b/qapi-schema.json
>>> @@ -1254,11 +1254,18 @@
>>>   #
>>>   # A discriminated record of operations that can be performed with
>>>   # @transaction.
>>> +#
>>> +# Since 1.1
>>> +# drive-backup since 1.6
>>> +# abort since 1.6
>>> +# blockdev-snapshot-internal-sync since 1.7
>>> +# blockdev-backup since 2.3
>> This seems a bit hard to read... Maybe an empty line after the "Since 1.1"
>> would help, but I'm not sure...
>>
>>>   ##
>>>   { 'union': 'TransactionAction',
>>>     'data': {
>>>          'blockdev-snapshot-sync': 'BlockdevSnapshot',
>>>          'drive-backup': 'DriveBackup',
>>> +       'blockdev-backup': 'BlockdevBackup',
>>>          'abort': 'Abort',
>>>          'blockdev-snapshot-internal-sync': 'BlockdevSnapshotInternal'
>>>      } }
>> So, about this patch in general: I know drive-backup works nearly the same
>> way. It starts block job in prepare(), which is aborted in abort(). But it
>> seems a bit like cheating to me. For me, a transaction is something which
>> you can start and if any of the operations cannot be executed (because its
>> preparation failed), all are aborted (that is, not even started). The
>> commit() part will really do the operation, and that will never fail because
>> prepare() has made sure it will not.
>>
>> This isn't the case when starting block jobs in prepare(). If some other
>> operation's prepare() fails, some data may have been copied already, so you
>> can't really roll back the operation. It isn't so bad for drive-backup,
>> because you're normally writing to a new image, so having overwritten that
>> image by a bit isn't so bad. But with blockdev-backup, you're overwriting a
>> block device inside qemu, so overwriting it partially may actually be bad
>> (or even overwriting it completely; if a transaction fails, I'd expect the
>> operations to have done nothing at all).
>>
>> I understand that drive-backup does the same thing already (although I don't
>> deem the impact there a bit less), so it may just be that my notion of
>> transactions is wrong.
>>
>> However, in this state, where is the advantage of making this an operation
>> usable in a transaction? I can just start a number of blockdev-backup block
>> jobs manually and cancel them if anything goes wrong. There's no difference,
>> so I don't see the benefit of making it a transaction operation if a
>> transaction does not actually mean "Do not do anything if we don't know for
>> sure that all operations will complete successfully."
> It's still different. When you start a number of drive-backup or
> blockdev-backup in a transaction, all the disks' data are from a single point
> of time. That is important for backup use cases.
>
> When you start jobs manually, the guest may already changed some data, so the
> disks are not consistent. Think of Linux guest dm soft raid use case.

Well, I don't know how bad a stop-continue wrapper around starting the 
block jobs would actually be (technically the VM is stopped during the 
transaction preparation as well), but fine.

I still wouldn't call this a transaction but rather a group operation. 
Stefan defined transactions to be "atomic group operations". 
drive-backup is not and blockdev-backup will not be atomic. But we 
already made the mistake with drive-backup so we can not really fix it 
(if it is a mistake at all, of course, which I'm assuming it is but I 
may be very wrong).

So, assuming it is a mistake (disregard all of the following if 
transactions are not intended to be completely atomic):

To try to mitigate damage I'd vote for introducing GroupOperations in 
contrast to Transactions (transactions are atomic, group operations are 
not). Non-atomic group operations will not have a commit() and probably 
also no abort() or clean(). They will just be started and that's it. We 
could then for example just allow any block job to be started in a group 
operation without having to write specialized code for all of them.

We would then have to call the fact that drive-backup is available for 
transactions just a quirky slip. Not good, but that's all we can do 
about it now.

Alternatively, we can make blockdev-backup a transaction and then 
explicitly state for drive-backup and blockdev-backup that those 
transactions are not really atomic. We'd have to describe exactly what 
happens when they are aborted (which is that a potentially already 
started block job will be aborted, but it is undefined how much of the 
block job has been done). I think it's worse than introducing 
GroupOperation and making the drive-backup transaction a deprecated 
legacy slip, but of course it'd be much easier.

So if we decide to go for GroupOperation, I wouldn't do it in this 
series (which means that this patch would have to wait for later). If we 
don't, this patch can stay and the documentation (what happens on abort) 
may follow later.

Max

  reply	other threads:[~2014-12-05  9:24 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-12-04  2:29 [Qemu-devel] [PATCH v4 0/3] qmp: Add "blockdev-backup" Fam Zheng
2014-12-04  2:29 ` [Qemu-devel] [PATCH v4 1/3] qmp: Add command 'blockdev-backup' Fam Zheng
2014-12-04 13:43   ` Max Reitz
2014-12-05  6:12     ` Fam Zheng
2014-12-05  9:10       ` Max Reitz
2014-12-19  8:47     ` Markus Armbruster
2014-12-04  2:29 ` [Qemu-devel] [PATCH v4 2/3] block: Add blockdev-backup to transaction Fam Zheng
2014-12-04 13:59   ` Max Reitz
2014-12-05  6:37     ` Fam Zheng
2014-12-05  9:24       ` Max Reitz [this message]
2014-12-05 14:47         ` Max Reitz
2014-12-04  2:29 ` [Qemu-devel] [PATCH v4 3/3] qemu-iotests: Test blockdev-backup in 055 Fam Zheng
2014-12-04 14:21   ` Max Reitz

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=548179C0.5050305@redhat.com \
    --to=mreitz@redhat.com \
    --cc=armbru@redhat.com \
    --cc=famz@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.