* [Qemu-devel] [PATCH] rbd: reload ceph config for block device
@ 2016-06-18 2:28 Vaibhav Bhembre
2016-06-20 16:34 ` Eric Blake
0 siblings, 1 reply; 3+ messages in thread
From: Vaibhav Bhembre @ 2016-06-18 2:28 UTC (permalink / raw)
To: qemu-devel; +Cc: Josh Durgin, Jeff Cody, Vaibhav Bhembre
This patch adds ability to reload ceph configuration for an attached RBD
block device. This is necessary for the cases where rebooting a VM and/or
detaching-reattaching a RBD drive is not an easy option.
The reload mechanism relies on the bdrv_reopen_* calls to provide a transactional
guarantee (using 2PC) for pulling in new configuration parameters. In the _prepare
phase we do the grunt-work of creating and establishing new connection and open
another instance of the same RBD image. If any issues are observed while creating a
connection using the new parameters we _abort the reload. The original connection to
the cluster is kept available and all ongoing I/O on it should be fine.
Once the _prepare phase completes successfully we enter the _commit phase. In this phase
we simple move the I/O over to the new fd for the corresponding image we have already
created in the _prepare phase and reclaim the old rados I/O context and connection.
It is important to note that because we want to use this feature when a QEMU VM is already
running, we need to switch the logic to have values in ceph.conf override the ones present
in the -drive file=* string in order for new changes to take place, for same keys present
in both places.
Signed-off-by: Vaibhav Bhembre <vaibhav@digitalocean.com>
---
block/rbd.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
hmp-commands.hx | 14 +++++++
hmp.c | 13 ++++++
hmp.h | 1 +
qapi-schema.json | 13 ++++++
qmp-commands.hx | 21 ++++++++++
qmp.c | 31 ++++++++++++++
7 files changed, 215 insertions(+)
diff --git a/block/rbd.c b/block/rbd.c
index 5226b6f..605f531 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -932,6 +932,125 @@ static int qemu_rbd_snap_list(BlockDriverState *bs,
return snap_count;
}
+static int qemu_rbd_reopen_prepare(BDRVReopenState *reopen_state,
+ BlockReopenQueue *queue, Error **errp)
+{
+ BDRVRBDState *new_s;
+ rados_t c;
+ rados_ioctx_t io_ctx;
+ char pool[RBD_MAX_POOL_NAME_SIZE];
+ char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
+ char conf[RBD_MAX_CONF_SIZE];
+ char clientname_buf[RBD_MAX_CONF_VAL_SIZE];
+ char *clientname;
+ int r;
+
+ new_s = reopen_state->opaque = g_new0(BDRVRBDState, 1);
+
+ r = qemu_rbd_parsename(reopen_state->bs->filename,
+ pool, sizeof pool,
+ snap_buf, sizeof snap_buf,
+ new_s->name, sizeof new_s->name,
+ conf, sizeof conf,
+ errp);
+ if (r < 0) {
+ return r;
+ }
+
+ if (snap_buf[0] != '\0') {
+ new_s->snap = g_strdup(snap_buf);
+ }
+
+ clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
+ r = rados_create(&c, clientname);
+ if (r < 0) {
+ error_setg_errno(errp, -r, "error creating cluster from config");
+ return r;
+ }
+ new_s->cluster = c;
+
+ if (conf[0] != '\0') {
+ r = qemu_rbd_set_conf(c, conf, false, errp);
+ if (r < 0) {
+ error_setg_errno(errp, -r, "error setting config");
+ return r;
+ }
+ }
+
+ if (strstr(conf, "conf=") == NULL) {
+ r = rados_conf_read_file(c, NULL);
+ } else if (conf[0] != '\0') {
+ r = qemu_rbd_set_conf(c, conf, true, errp);
+ }
+
+ if (r < 0) {
+ error_setg_errno(errp, -r, "error parsing config");
+ return r;
+ }
+
+ r = rados_connect(c);
+ if (r < 0) {
+ error_setg_errno(errp, -r, "error connecting");
+ return r;
+ }
+
+ r = rados_ioctx_create(c, pool, &io_ctx);
+ if (r < 0) {
+ error_setg_errno(errp, -r, "error creating ioctx");
+ return r;
+ }
+ new_s->io_ctx = io_ctx;
+
+ r = rbd_open(io_ctx, new_s->name, &new_s->image, new_s->snap);
+ if (r < 0) {
+ error_setg_errno(errp, -r, "error opening rbd");
+ return r;
+ }
+
+ return 0;
+}
+
+static void qemu_rbd_reopen_abort(BDRVReopenState *reopen_state)
+{
+ BDRVRBDState *new_s = reopen_state->opaque;
+
+ if (new_s->io_ctx) {
+ rados_ioctx_destroy(new_s->io_ctx);
+ }
+
+ if (new_s->cluster) {
+ rados_shutdown(new_s->cluster);
+ }
+
+ g_free(new_s->snap);
+ g_free(reopen_state->opaque);
+ reopen_state->opaque = NULL;
+}
+
+static void qemu_rbd_reopen_commit(BDRVReopenState *reopen_state)
+{
+ BDRVRBDState *s, *new_s;
+
+ s = reopen_state->bs->opaque;
+ new_s = reopen_state->opaque;
+
+ rados_aio_flush(s->io_ctx);
+
+ rbd_close(s->image);
+ rados_ioctx_destroy(s->io_ctx);
+ g_free(s->snap);
+ rados_shutdown(s->cluster);
+
+ s->io_ctx = new_s->io_ctx;
+ s->cluster = new_s->cluster;
+ s->image = new_s->image;
+ s->snap = new_s->snap;
+ reopen_state->bs->read_only = (s->snap != NULL);
+
+ g_free(reopen_state->opaque);
+ reopen_state->opaque = NULL;
+}
+
#ifdef LIBRBD_SUPPORTS_DISCARD
static BlockAIOCB* qemu_rbd_aio_discard(BlockDriverState *bs,
int64_t sector_num,
@@ -991,6 +1110,9 @@ static BlockDriver bdrv_rbd = {
.create_opts = &qemu_rbd_create_opts,
.bdrv_getlength = qemu_rbd_getlength,
.bdrv_truncate = qemu_rbd_truncate,
+ .bdrv_reopen_prepare = qemu_rbd_reopen_prepare,
+ .bdrv_reopen_commit = qemu_rbd_reopen_commit,
+ .bdrv_reopen_abort = qemu_rbd_reopen_abort,
.protocol_name = "rbd",
.bdrv_aio_readv = qemu_rbd_aio_readv,
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 98b4b1a..583c4a9 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1759,3 +1759,17 @@ ETEXI
STEXI
@end table
ETEXI
+
+
+ {
+ .name = "reload-rbd-config",
+ .args_type = "device:s",
+ .params = "device",
+ .help = "reload rbd ceph config live",
+ .mhandler.cmd = hmp_reload_rbd_config,
+ },
+
+STEXI
+@item reload rbd config
+Reload ceph config for RBD image.
+ETEXI
diff --git a/hmp.c b/hmp.c
index 997a768..597fe74 100644
--- a/hmp.c
+++ b/hmp.c
@@ -2475,3 +2475,16 @@ void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict)
qapi_free_HotpluggableCPUList(saved);
}
+
+void hmp_reload_rbd_config(Monitor *mon, const QDict *qdict)
+{
+ const char *device = qdict_get_str(qdict, "device");
+ Error *err = NULL;
+
+ qmp_reload_rbd_config(device, &err);
+ if (err) {
+ monitor_printf(mon, "%s\n", error_get_pretty(err));
+ error_free(err);
+ return;
+ }
+}
diff --git a/hmp.h b/hmp.h
index f5d9749..8d2edf7 100644
--- a/hmp.h
+++ b/hmp.h
@@ -133,5 +133,6 @@ void hmp_rocker_of_dpa_flows(Monitor *mon, const QDict *qdict);
void hmp_rocker_of_dpa_groups(Monitor *mon, const QDict *qdict);
void hmp_info_dump(Monitor *mon, const QDict *qdict);
void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict);
+void hmp_reload_rbd_config(Monitor *mon, const QDict *qdict);
#endif
diff --git a/qapi-schema.json b/qapi-schema.json
index 0964eec..2a30cc7 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -4308,3 +4308,16 @@
# Since: 2.7
##
{ 'command': 'query-hotpluggable-cpus', 'returns': ['HotpluggableCPU'] }
+
+##
+# @reload-rbd-config
+#
+# Reload the ceph config for a given RBD block device attached to the VM.
+#
+# @device: Name of the device.
+#
+# Returns: nothing on success.
+#
+# Since: 2.5
+##
+{'command': 'reload-rbd-config', 'data': { 'device': 'str' } }
diff --git a/qmp-commands.hx b/qmp-commands.hx
index b444c20..6db6775 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -4983,3 +4983,24 @@ Example for pseries machine type started with
{ "props": { "core": 0 }, "type": "POWER8-spapr-cpu-core",
"vcpus-count": 1, "qom-path": "/machine/unattached/device[0]"}
]}'
+
+EQMP
+
+ {
+ .name = "reload-rbd-config",
+ .args_type = "device:s",
+ .mhandler.cmd_new = qmp_marshal_reload_rbd_config,
+ },
+
+SQMP
+reload-rbd-config
+-----------------------------------------
+
+Reload the ceph config for an RBD block device.
+
+Arguments: None.
+
+Example:
+
+-> { "execute": "reload-rbd-config", "arguments": { "device": "drive-virtio-disk0" } }
+<- { "return": {} }
diff --git a/qmp.c b/qmp.c
index 7df6543..d1205ac 100644
--- a/qmp.c
+++ b/qmp.c
@@ -708,3 +708,34 @@ ACPIOSTInfoList *qmp_query_acpi_ospm_status(Error **errp)
return head;
}
+
+void qmp_reload_rbd_config(const char *device, Error **errp)
+{
+ BlockBackend *blk;
+ BlockDriverState *bs;
+ Error *local_err = NULL;
+ int ret;
+
+ blk = blk_by_name(device);
+ if (!blk) {
+ error_setg(errp, QERR_INVALID_PARAMETER, "device");
+ return;
+ }
+
+ bs = blk_bs(blk);
+ if (!bs) {
+ error_setg(errp, "no BDS found");
+ return;
+ }
+
+ ret = bdrv_reopen(bs, bdrv_get_flags(bs), &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ if (ret) {
+ error_setg_errno(errp, -ret, "failed reopening device");
+ return;
+ }
+}
--
1.9.1
^ permalink raw reply related [flat|nested] 3+ messages in thread* Re: [Qemu-devel] [PATCH] rbd: reload ceph config for block device
2016-06-18 2:28 [Qemu-devel] [PATCH] rbd: reload ceph config for block device Vaibhav Bhembre
@ 2016-06-20 16:34 ` Eric Blake
2016-06-20 17:10 ` Vaibhav Bhembre
0 siblings, 1 reply; 3+ messages in thread
From: Eric Blake @ 2016-06-20 16:34 UTC (permalink / raw)
To: Vaibhav Bhembre, qemu-devel; +Cc: Josh Durgin, Jeff Cody
[-- Attachment #1: Type: text/plain, Size: 1161 bytes --]
On 06/17/2016 08:28 PM, Vaibhav Bhembre wrote:
> This patch adds ability to reload ceph configuration for an attached RBD
> block device. This is necessary for the cases where rebooting a VM and/or
> detaching-reattaching a RBD drive is not an easy option.
>
> The reload mechanism relies on the bdrv_reopen_* calls to provide a transactional
> +++ b/qapi-schema.json
> @@ -4308,3 +4308,16 @@
> # Since: 2.7
> ##
> { 'command': 'query-hotpluggable-cpus', 'returns': ['HotpluggableCPU'] }
> +
> +##
> +# @reload-rbd-config
> +#
> +# Reload the ceph config for a given RBD block device attached to the VM.
> +#
> +# @device: Name of the device.
> +#
> +# Returns: nothing on success.
> +#
> +# Since: 2.5
You've missed 2.5 by a long shot. This should be 2.7.
> +##
> +{'command': 'reload-rbd-config', 'data': { 'device': 'str' } }
Would 'node' be a better name than 'device'? Isn't this really reloading
the state of an arbitrary BDS node, regardless of whether (or even if)
there is a BB device using that BDS?
--
Eric Blake eblake redhat com +1-919-301-3266
Libvirt virtualization library http://libvirt.org
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 604 bytes --]
^ permalink raw reply [flat|nested] 3+ messages in thread* Re: [Qemu-devel] [PATCH] rbd: reload ceph config for block device
2016-06-20 16:34 ` Eric Blake
@ 2016-06-20 17:10 ` Vaibhav Bhembre
0 siblings, 0 replies; 3+ messages in thread
From: Vaibhav Bhembre @ 2016-06-20 17:10 UTC (permalink / raw)
To: Eric Blake; +Cc: qemu-devel, Josh Durgin, Jeff Cody
On Mon, Jun 20, 2016 at 12:34 PM, Eric Blake <eblake@redhat.com> wrote:
> On 06/17/2016 08:28 PM, Vaibhav Bhembre wrote:
> > This patch adds ability to reload ceph configuration for an attached RBD
> > block device. This is necessary for the cases where rebooting a VM and/or
> > detaching-reattaching a RBD drive is not an easy option.
> >
> > The reload mechanism relies on the bdrv_reopen_* calls to provide a
> transactional
>
> > +++ b/qapi-schema.json
> > @@ -4308,3 +4308,16 @@
> > # Since: 2.7
> > ##
> > { 'command': 'query-hotpluggable-cpus', 'returns': ['HotpluggableCPU'] }
> > +
> > +##
> > +# @reload-rbd-config
> > +#
> > +# Reload the ceph config for a given RBD block device attached to the
> VM.
> > +#
> > +# @device: Name of the device.
> > +#
> > +# Returns: nothing on success.
> > +#
> > +# Since: 2.5
>
> You've missed 2.5 by a long shot. This should be 2.7.
>
True. I will update.
>
> > +##
> > +{'command': 'reload-rbd-config', 'data': { 'device': 'str' } }
>
> Would 'node' be a better name than 'device'? Isn't this really reloading
> the state of an arbitrary BDS node, regardless of whether (or even if)
> there is a BB device using that BDS?
>
That is correct. It will update the state of a BDS node irrespective of
the device. I will make this change.
>
> --
> Eric Blake eblake redhat com +1-919-301-3266
> Libvirt virtualization library http://libvirt.org
>
>
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2016-06-20 17:11 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-06-18 2:28 [Qemu-devel] [PATCH] rbd: reload ceph config for block device Vaibhav Bhembre
2016-06-20 16:34 ` Eric Blake
2016-06-20 17:10 ` Vaibhav Bhembre
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.