From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:50865) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1WOCao-00008L-S5 for qemu-devel@nongnu.org; Thu, 13 Mar 2014 16:51:11 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1WOCaj-0002Zx-Du for qemu-devel@nongnu.org; Thu, 13 Mar 2014 16:51:06 -0400 Received: from mx1.redhat.com ([209.132.183.28]:18736) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1WOCaj-0002ZA-3B for qemu-devel@nongnu.org; Thu, 13 Mar 2014 16:51:01 -0400 Message-ID: <53221A2C.7060607@redhat.com> Date: Thu, 13 Mar 2014 21:50:52 +0100 From: Max Reitz MIME-Version: 1.0 References: <1394574786-32579-1-git-send-email-benoit.canet@irqsave.net> <1394574786-32579-3-git-send-email-benoit.canet@irqsave.net> In-Reply-To: <1394574786-32579-3-git-send-email-benoit.canet@irqsave.net> Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [PATCH V2 2/3] block: Add drive-mirror-replace command to repair quorum files. List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: =?ISO-8859-1?Q?Beno=EEt_Canet?= , qemu-devel@nongnu.org Cc: kwolf@redhat.com, Benoit Canet , stefanha@redhat.com On 11.03.2014 22:53, Beno=EEt Canet wrote: > When a quorum file is totally destroyed (broken NAS or SAN) the user ca= n start a > drive-mirror job on the quorum block backend and then replace the broke= n > quorum file with drive-mirror-replace given it has a node-name. > > Signed-off-by: Benoit Canet > --- > block.c | 8 ++-- > block/mirror.c | 116 +++++++++++++++++++++++++++++++++++++= +++++++-- > blockdev.c | 27 +++++++++++ > include/block/block.h | 3 ++ > include/block/block_int.h | 15 ++++++ > qapi-schema.json | 33 +++++++++++++ > qmp-commands.hx | 5 ++ > trace-events | 1 + > 8 files changed, 202 insertions(+), 6 deletions(-) > > diff --git a/block.c b/block.c > index 9f2fe85..3fd38b4 100644 > --- a/block.c > +++ b/block.c > @@ -787,10 +787,12 @@ static int bdrv_open_flags(BlockDriverState *bs, = int flags) > return open_flags; > } > =20 > -static int bdrv_assign_node_name(BlockDriverState *bs, > - const char *node_name, > - Error **errp) > +int bdrv_assign_node_name(BlockDriverState *bs, > + const char *node_name, > + Error **errp) > { > + assert(bs->node_name[0] =3D=3D '\0'); > + > if (!node_name) { > return 0; > } > diff --git a/block/mirror.c b/block/mirror.c > index 6dc84e8..9365669 100644 > --- a/block/mirror.c > +++ b/block/mirror.c > @@ -49,6 +49,15 @@ typedef struct MirrorBlockJob { > unsigned long *in_flight_bitmap; > int in_flight; > int ret; > + /* these four fields are used by drive-mirror-replace */ > + /* The code must replace a target with the new mirror */ > + bool must_replace; > + /* The block BDS to replace */ > + BlockDriverState *to_replace; > + /* the node-name of the new mirror BDS */ > + char *new_node_name; > + /* Used to block operations on the drive-mirror-replace target. */ > + Error *replace_blocker; > } MirrorBlockJob; > =20 > typedef struct MirrorOp { > @@ -299,6 +308,7 @@ static void coroutine_fn mirror_run(void *opaque) > MirrorBlockJob *s =3D opaque; > BlockDriverState *bs =3D s->common.bs; > int64_t sector_num, end, sectors_per_chunk, length; > + BlockDriverState *to_replace; > uint64_t last_pause_ns; > BlockDriverInfo bdi; > char backing_filename[1024]; > @@ -477,11 +487,17 @@ immediate_exit: > g_free(s->in_flight_bitmap); > bdrv_release_dirty_bitmap(bs, s->dirty_bitmap); > bdrv_iostatus_disable(s->target); > + /* Here we handle the drive-mirror-replace QMP command */ > + if (s->must_replace) { > + to_replace =3D s->to_replace; > + } else { > + to_replace =3D s->common.bs; > + } > if (s->should_complete && ret =3D=3D 0) { > - if (bdrv_get_flags(s->target) !=3D bdrv_get_flags(s->common.bs= )) { > - bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL)= ; > + if (bdrv_get_flags(s->target) !=3D bdrv_get_flags(to_replace))= { > + bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL); > } > - bdrv_swap(s->target, s->common.bs); > + bdrv_swap(s->target, to_replace); > if (s->common.driver->job_type =3D=3D BLOCK_JOB_TYPE_COMMIT) = { > /* drop the bs loop chain formed by the swap: break the l= oop then > * trigger the unref from the top one */ > @@ -491,6 +507,11 @@ immediate_exit: > bdrv_unref(p); > } > } > + if (s->must_replace) { > + bdrv_op_unblock_all(s->to_replace, s->replace_blocker); > + error_free(s->replace_blocker); > + bdrv_unref(s->to_replace); > + } > bdrv_unref(s->target); > block_job_completed(&s->common, ret); > } > @@ -513,6 +534,88 @@ static void mirror_iostatus_reset(BlockJob *job) > bdrv_iostatus_reset(s->target); > } > =20 > +bool mirror_set_replace_target(BlockJob *job, const char *reference, > + bool has_new_node_name, > + const char *new_node_name, Error **errp= ) > +{ > + MirrorBlockJob *s =3D container_of(job, MirrorBlockJob, common); > + BlockDriverState *to_replace; > + > + /* We don't want to give too much power to the user as this could = result in > + * BlockDriverState loops if used with something other than sync=3D= full. > + */ > + if (s->is_none_mode || s->base) { > + error_setg(errp, "Can only be used on a mirror done with sync=3D= full"); > + return false; > + } > + > + /* check that the target reference is not an empty string */ > + if (!reference[0]) { > + error_setg(errp, "target-reference is an empty string"); > + return false; > + } > + > + /* Get the block driver state to be replaced */ > + to_replace =3D bdrv_lookup_bs(reference, reference, errp); > + if (!to_replace) { > + return false; > + } > + > + /* If the BDS to be replaced is a regular node we need a new node = name */ > + if (to_replace->node_name[0] && !has_new_node_name) { > + error_setg(errp, "A new-node-name must be provided"); > + return false; > + } > + > + /* Can only replace something else than the source of the mirror *= / > + if (to_replace =3D=3D job->bs) { > + error_setg(errp, "Cannot replace the mirror source"); > + return false; > + } > + > + /* is this bs replace operation blocked */ > + if (bdrv_op_is_blocked(to_replace, BLOCK_OP_TYPE_REPLACE, errp)) { > + return false; > + } > + > + /* save useful infos for later */ > + s->to_replace =3D to_replace; > + assert(has_new_node_name); Sorry to have missed this in v1: In qapi-schema.json, the description (I=20 guess) tells me that the new_node_name is optional and does not need to=20 be provided if the target device is not a (regular) node of the BDS graph. In case it is a regular node and new_node_name is not given, you're=20 returning an appropriate error message above. But in any other case, not=20 giving new_node_name should be fine. However, you're asserting that it=20 is always given here, and if it isn't, this won't return a more useful=20 error message, but just abort qemu. Are you planning to add support for=20 not passing new_node_name in the future? > + s->new_node_name =3D g_strdup(new_node_name); > + > + return true; > +} > + > +static void mirror_activate_replace_target(BlockJob *job, Error **errp= ) > +{ > + MirrorBlockJob *s =3D container_of(job, MirrorBlockJob, common); > + > + /* Set the new node name if the BDS to replace is a regular node > + * of the graph. > + */ > + if (s->to_replace->node_name[0]) { > + assert(s->new_node_name); > + bdrv_assign_node_name(s->target, s->new_node_name, errp); > + } > + > + g_free(s->new_node_name); > + > + if (error_is_set(errp)) { > + s->to_replace =3D NULL; > + return; > + } > + > + /* block all operations on the target bs */ > + error_setg(&s->replace_blocker, > + "block device is in use by drive-mirror-replace"); > + bdrv_op_block_all(s->to_replace, s->replace_blocker); > + > + bdrv_ref(s->to_replace); > + > + /* activate the replacement operation */ > + s->must_replace =3D true; > +} > + > static void mirror_complete(BlockJob *job, Error **errp) > { > MirrorBlockJob *s =3D container_of(job, MirrorBlockJob, common); > @@ -529,6 +632,13 @@ static void mirror_complete(BlockJob *job, Error *= *errp) > return; > } > =20 > + /* drive-mirror-replace is being called on this job so activate th= e > + * replacement target > + */ > + if (s->to_replace) { > + mirror_activate_replace_target(job, errp); > + } > + > s->should_complete =3D true; > block_job_resume(job); > } > diff --git a/blockdev.c b/blockdev.c > index 8a6ae0a..901a05d 100644 > --- a/blockdev.c > +++ b/blockdev.c > @@ -2365,6 +2365,33 @@ void qmp_block_job_complete(const char *device, = Error **errp) > block_job_complete(job, errp); > } > =20 > +void qmp_drive_mirror_replace(const char *device, const char *target_r= eference, > + bool has_new_node_name, > + const char *new_node_name, > + Error **errp) > +{ > + BlockJob *job =3D find_block_job(device); > + > + if (!job) { > + error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); > + return; > + } > + > + if (!job->driver || job->driver->job_type !=3D BLOCK_JOB_TYPE_MIRR= OR) { > + error_setg(errp, "Can only be used on a drive-mirror block job= "); > + return; > + } > + > + if (!mirror_set_replace_target(job, target_reference, has_new_node= _name, > + new_node_name, errp)) { > + return; > + } > + > + trace_qmp_drive_mirror_replace(job, target_reference, > + has_new_node_name ? new_node_name := ""); > + block_job_complete(job, errp); > +} > + > void qmp_blockdev_add(BlockdevOptions *options, Error **errp) > { > QmpOutputVisitor *ov =3D qmp_output_visitor_new(); > diff --git a/include/block/block.h b/include/block/block.h > index ee1582d..a9d6ead 100644 > --- a/include/block/block.h > +++ b/include/block/block.h > @@ -171,6 +171,7 @@ typedef enum BlockOpType { > BLOCK_OP_TYPE_MIRROR, > BLOCK_OP_TYPE_RESIZE, > BLOCK_OP_TYPE_STREAM, > + BLOCK_OP_TYPE_REPLACE, > BLOCK_OP_TYPE_MAX, > } BlockOpType; > =20 > @@ -209,6 +210,8 @@ int bdrv_open_image(BlockDriverState **pbs, const c= har *filename, > QDict *options, const char *bdref_key, int flags, > bool allow_none, Error **errp); > void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *back= ing_hd); > +int bdrv_assign_node_name(BlockDriverState *bs, const char *node_name, > + Error **errp); > int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Erro= r **errp); > int bdrv_open(BlockDriverState **pbs, const char *filename, > const char *reference, QDict *options, int flags, > diff --git a/include/block/block_int.h b/include/block/block_int.h > index 1f4f78b..ea281c8 100644 > --- a/include/block/block_int.h > +++ b/include/block/block_int.h > @@ -486,6 +486,21 @@ void mirror_start(BlockDriverState *bs, BlockDrive= rState *target, > void *opaque, Error **errp); > =20 > /* > + * mirror_set_replace_target: > + * @job: An active mirroring block job started with sync=3Dfull. > + * @reference: id or node-name of the BDS to replace when the mirror i= s done. > + * @has_new_node_name: Set to true if new_node_name if provided > + * @new_node_name: The optional new node name of the new mirror. > + * @errp: Error object. > + * > + * Prepare a mirroring operation to replace a BDS pointed to by refere= nce with > + * the new mirror. > + */ > +bool mirror_set_replace_target(BlockJob *job, const char *reference, > + bool has_new_node_name, > + const char *new_node_name, Error **errp= ); > + > +/* > * backup_start: > * @bs: Block device to operate on. > * @target: Block device to write to. > diff --git a/qapi-schema.json b/qapi-schema.json > index f5a8767..ef830e8 100644 > --- a/qapi-schema.json > +++ b/qapi-schema.json > @@ -2716,6 +2716,39 @@ > { 'command': 'block-job-complete', 'data': { 'device': 'str' } } > =20 > ## > +# @drive-mirror-replace: > +# > +# Manually trigger completion of an active background drive-mirror ope= ration > +# and replace the target reference with the new mirror. > +# This switches the device to write to the target path only. > +# The ability to complete is signaled with a BLOCK_JOB_READY event. > +# > +# This command completes an active drive-mirror background operation > +# synchronously and replaces the target reference with the mirror. > +# The ordering of this command's return with the BLOCK_JOB_COMPLETED e= vent > +# is not defined. Note that if an I/O error occurs during the process= ing of > +# this command: 1) the command itself will fail; 2) the error will be = processed > +# according to the rerror/werror arguments that were specified when st= arting > +# the operation. > +# > +# A cancelled or paused drive-mirror job cannot be completed. > +# > +# @device: the device name > +# @target-reference: the id or node name of the block driver state to = replace > +# @new-node-name: #optional set the node-name of the new block driv= er state > +# needed the target reference point to a regular no= de of the > +# graph Again, sorry for not noting this in my review for v1, but I seem to have=20 problems parsing this sentence (the last two lines). Did you omit an=20 "if" and an "-s", so it would read "Needed if the target reference=20 points to a regular node of the graph"? Max > +# > +# Returns: Nothing on success > +# If no background operation is active on this device, Device= NotActive > +# > +# Since: 2.1 > +## > +{ 'command': 'drive-mirror-replace', > + 'data': { 'device': 'str', 'target-reference': 'str', > + '*new-node-name': 'str' } } > + > +## > # @ObjectTypeInfo: > # > # This structure describes a search result from @qom-list-types > diff --git a/qmp-commands.hx b/qmp-commands.hx > index dd336f7..3b5b382 100644 > --- a/qmp-commands.hx > +++ b/qmp-commands.hx > @@ -1150,6 +1150,11 @@ EQMP > .mhandler.cmd_new =3D qmp_marshal_input_block_job_complete, > }, > { > + .name =3D "drive-mirror-replace", > + .args_type =3D "device:B,target-reference:s,new-node-name:s?"= , > + .mhandler.cmd_new =3D qmp_marshal_input_drive_mirror_replace, > + }, > + { > .name =3D "transaction", > .args_type =3D "actions:q", > .mhandler.cmd_new =3D qmp_marshal_input_transaction, > diff --git a/trace-events b/trace-events > index aec4202..5282904 100644 > --- a/trace-events > +++ b/trace-events > @@ -103,6 +103,7 @@ qmp_block_job_cancel(void *job) "job %p" > qmp_block_job_pause(void *job) "job %p" > qmp_block_job_resume(void *job) "job %p" > qmp_block_job_complete(void *job) "job %p" > +qmp_drive_mirror_replace(void *job, const char *target_reference, cons= t char *new_node_name) "job %p target_reference %s new_node_name %s" > block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d" > qmp_block_stream(void *bs, void *job) "bs %p job %p" > =20