qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v2] rbd: reload ceph config for block device
@ 2016-07-14 19:32 Vaibhav Bhembre
  2016-07-14 20:28 ` Eric Blake
  0 siblings, 1 reply; 5+ messages in thread
From: Vaibhav Bhembre @ 2016-07-14 19:32 UTC (permalink / raw)
  To: qemu-devel
  Cc: Josh Durgin, Jeff Cody, Kevin Wolf, Max Reitz, Luiz Capitulino,
	Eric Blake, Markus Armbruster, Vaibhav Bhembre

This patch adds ability to reload ceph configuration for an attached RBD
block device. This is necessary for the cases where rebooting a VM and/or
detaching-reattaching a RBD drive is not an easy option.

The reload mechanism relies on the bdrv_reopen_* calls to provide a transactional
guarantee (using 2PC) for pulling in new configuration parameters. In the _prepare
phase we do the grunt-work of creating and establishing new connection and open
another instance of the same RBD image. If any issues are observed while creating a
connection using the new parameters we _abort the reload. The original connection to
the cluster is kept available and all ongoing I/O on it should be fine.

Once the _prepare phase completes successfully we enter the _commit phase. In this phase
we simple move the I/O over to the new fd for the corresponding image we have already
created in the _prepare phase and reclaim the old rados I/O context and connection.

It is important to note that because we want to use this feature when a QEMU VM is already
running, we need to switch the logic to have values in ceph.conf override the ones present
in the -drive file=* string in order for new changes to take place, for same keys present
in both places.

Signed-off-by: Vaibhav Bhembre <vaibhav@digitalocean.com>

diff --git a/block/rbd.c b/block/rbd.c
index 0a5840d..100f398 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -930,6 +930,125 @@ static int qemu_rbd_snap_list(BlockDriverState *bs,
     return snap_count;
 }
 
+static int qemu_rbd_reopen_prepare(BDRVReopenState *reopen_state,
+                               BlockReopenQueue *queue, Error **errp)
+{
+    BDRVRBDState *new_s;
+    rados_t c;
+    rados_ioctx_t io_ctx;
+    char pool[RBD_MAX_POOL_NAME_SIZE];
+    char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
+    char conf[RBD_MAX_CONF_SIZE];
+    char clientname_buf[RBD_MAX_CONF_VAL_SIZE];
+    char *clientname;
+    int r;
+
+    new_s = reopen_state->opaque = g_new0(BDRVRBDState, 1);
+
+    r = qemu_rbd_parsename(reopen_state->bs->filename,
+                           pool, sizeof pool,
+                           snap_buf, sizeof snap_buf,
+                           new_s->name, sizeof new_s->name,
+                           conf, sizeof conf,
+                           errp);
+    if (r < 0) {
+        return r;
+    }
+
+    if (snap_buf[0] != '\0') {
+        new_s->snap = g_strdup(snap_buf);
+    }
+
+    clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
+    r = rados_create(&c, clientname);
+    if (r < 0) {
+        error_setg_errno(errp, -r, "error creating cluster from config");
+        return r;
+    }
+    new_s->cluster = c;
+
+    if (conf[0] != '\0') {
+        r = qemu_rbd_set_conf(c, conf, false, errp);
+        if (r < 0) {
+            error_setg_errno(errp, -r, "error setting config");
+            return r;
+        }
+    }
+
+    if (strstr(conf, "conf=") == NULL) {
+        r = rados_conf_read_file(c, NULL);
+    } else if (conf[0] != '\0') {
+        r = qemu_rbd_set_conf(c, conf, true, errp);
+    }
+
+    if (r < 0) {
+        error_setg_errno(errp, -r, "error parsing config");
+        return r;
+    }
+
+    r = rados_connect(c);
+    if (r < 0) {
+        error_setg_errno(errp, -r, "error connecting");
+        return r;
+    }
+
+    r = rados_ioctx_create(c, pool, &io_ctx);
+    if (r < 0) {
+        error_setg_errno(errp, -r, "error creating ioctx");
+        return r;
+    }
+    new_s->io_ctx = io_ctx;
+
+    r = rbd_open(io_ctx, new_s->name, &new_s->image, new_s->snap);
+    if (r < 0) {
+        error_setg_errno(errp, -r, "error opening rbd");
+        return r;
+    }
+
+    return 0;
+}
+
+static void qemu_rbd_reopen_abort(BDRVReopenState *reopen_state)
+{
+    BDRVRBDState *new_s = reopen_state->opaque;
+
+    if (new_s->io_ctx) {
+        rados_ioctx_destroy(new_s->io_ctx);
+    }
+
+    if (new_s->cluster) {
+        rados_shutdown(new_s->cluster);
+    }
+
+    g_free(new_s->snap);
+    g_free(reopen_state->opaque);
+    reopen_state->opaque = NULL;
+}
+
+static void qemu_rbd_reopen_commit(BDRVReopenState *reopen_state)
+{
+    BDRVRBDState *s, *new_s;
+
+    s = reopen_state->bs->opaque;
+    new_s = reopen_state->opaque;
+
+    rados_aio_flush(s->io_ctx);
+
+    rbd_close(s->image);
+    rados_ioctx_destroy(s->io_ctx);
+    g_free(s->snap);
+    rados_shutdown(s->cluster);
+
+    s->io_ctx = new_s->io_ctx;
+    s->cluster = new_s->cluster;
+    s->image = new_s->image;
+    s->snap = new_s->snap;
+    reopen_state->bs->read_only = (s->snap != NULL);
+
+    g_free(reopen_state->opaque);
+    reopen_state->opaque = NULL;
+}
+
 #ifdef LIBRBD_SUPPORTS_DISCARD
 static BlockAIOCB* qemu_rbd_aio_discard(BlockDriverState *bs,
                                         int64_t sector_num,
@@ -989,6 +1108,9 @@ static BlockDriver bdrv_rbd = {
     .create_opts        = &qemu_rbd_create_opts,
     .bdrv_getlength     = qemu_rbd_getlength,
     .bdrv_truncate      = qemu_rbd_truncate,
+    .bdrv_reopen_prepare = qemu_rbd_reopen_prepare,
+    .bdrv_reopen_commit  = qemu_rbd_reopen_commit,
+    .bdrv_reopen_abort   = qemu_rbd_reopen_abort,
     .protocol_name      = "rbd",
 
     .bdrv_aio_readv         = qemu_rbd_aio_readv,
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 98b4b1a..3d06dc0 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1759,3 +1759,17 @@ ETEXI
 STEXI
 @end table
 ETEXI
+
+
+    {
+        .name = "reload-rbd-config",
+        .args_type = "node:s",
+        .params = "node",
+        .help = "reload rbd ceph config live",
+        .mhandler.cmd = hmp_reload_rbd_config,
+    },
+
+STEXI
+@item reload rbd config
+Reload ceph config for RBD image.
+ETEXI
diff --git a/hmp.c b/hmp.c
index 0cf5baa..4cf0036 100644
--- a/hmp.c
+++ b/hmp.c
@@ -2474,3 +2474,16 @@ void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict)
 
     qapi_free_HotpluggableCPUList(saved);
 }
+
+void hmp_reload_rbd_config(Monitor *mon, const QDict *qdict)
+{
+    const char *node = qdict_get_str(qdict, "node");
+    Error *err = NULL;
+
+    qmp_reload_rbd_config(node, &err);
+    if (err) {
+        monitor_printf(mon, "%s\n", error_get_pretty(err));
+        error_free(err);
+        return;
+    }
+}
diff --git a/hmp.h b/hmp.h
index f5d9749..8d2edf7 100644
--- a/hmp.h
+++ b/hmp.h
@@ -133,5 +133,6 @@ void hmp_rocker_of_dpa_flows(Monitor *mon, const QDict *qdict);
 void hmp_rocker_of_dpa_groups(Monitor *mon, const QDict *qdict);
 void hmp_info_dump(Monitor *mon, const QDict *qdict);
 void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict);
+void hmp_reload_rbd_config(Monitor *mon, const QDict *qdict);
 
 #endif
diff --git a/qapi-schema.json b/qapi-schema.json
index d2d6506..83921d6 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -4317,3 +4317,16 @@
 # Since: 2.7
 ##
 { 'command': 'query-hotpluggable-cpus', 'returns': ['HotpluggableCPU'] }
+
+##
+# @reload-rbd-config
+#
+# Reload the ceph config for a given RBD block device attached to the VM.
+#
+# @node: Name of the node.
+#
+# Returns: nothing on success.
+#
+# Since: 2.7
+##
+{'command': 'reload-rbd-config', 'data': { 'node': 'str' } }
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 6937e83..906b1fe 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -4983,3 +4983,24 @@ Example for pseries machine type started with
      { "props": { "core-id": 0 }, "type": "POWER8-spapr-cpu-core",
        "vcpus-count": 1, "qom-path": "/machine/unattached/device[0]"}
    ]}'
+
+EQMP
+
+     {
+        .name = "reload-rbd-config",
+        .args_type = "node:s",
+        .mhandler.cmd_new = qmp_marshal_reload_rbd_config,
+     },
+
+SQMP
+reload-rbd-config
+-----------------------------------------
+
+Reload the ceph config for an RBD block device.
+
+Arguments: None.
+
+Example:
+
+-> { "execute": "reload-rbd-config", "arguments": { "node": "drive-virtio-disk0" } }
+<- { "return": {} }
diff --git a/qmp.c b/qmp.c
index b6d531e..1ff81d6 100644
--- a/qmp.c
+++ b/qmp.c
@@ -707,3 +707,34 @@ ACPIOSTInfoList *qmp_query_acpi_ospm_status(Error **errp)
 
     return head;
 }
+
+void qmp_reload_rbd_config(const char *node, Error **errp)
+{
+    BlockBackend *blk;
+    BlockDriverState *bs;
+    Error *local_err = NULL;
+    int ret;
+
+    blk = blk_by_name(node);
+    if (!blk) {
+        error_setg(errp, QERR_INVALID_PARAMETER, "node");
+        return;
+    }
+
+    bs = blk_bs(blk);
+    if (!bs) {
+        error_setg(errp, "no BDS found");
+        return;
+    }
+
+    ret = bdrv_reopen(bs, bdrv_get_flags(bs), &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    if (ret) {
+        error_setg_errno(errp, -ret, "failed reopening node");
+        return;
+    }
+}
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2016-07-14 23:14 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-07-14 19:32 [Qemu-devel] [PATCH v2] rbd: reload ceph config for block device Vaibhav Bhembre
2016-07-14 20:28 ` Eric Blake
2016-07-14 20:53   ` Vaibhav Bhembre
2016-07-14 21:19     ` Eric Blake
2016-07-14 23:14       ` Vaibhav Bhembre

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).