qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Vaibhav Bhembre <vaibhav@digitalocean.com>
To: qemu-devel@nongnu.org
Cc: Josh Durgin <jdurgin@redhat.com>, Jeff Cody <jcody@redhat.com>,
	Kevin Wolf <kwolf@redhat.com>, Max Reitz <mreitz@redhat.com>,
	Luiz Capitulino <lcapitulino@redhat.com>,
	Eric Blake <eblake@redhat.com>,
	Markus Armbruster <armbru@redhat.com>,
	Vaibhav Bhembre <vaibhav@digitalocean.com>
Subject: [Qemu-devel] [PATCH v2] rbd: reload ceph config for block device
Date: Thu, 14 Jul 2016 15:32:11 -0400	[thread overview]
Message-ID: <1468524731-2306-1-git-send-email-vaibhav@digitalocean.com> (raw)

This patch adds ability to reload ceph configuration for an attached RBD
block device. This is necessary for the cases where rebooting a VM and/or
detaching-reattaching a RBD drive is not an easy option.

The reload mechanism relies on the bdrv_reopen_* calls to provide a transactional
guarantee (using 2PC) for pulling in new configuration parameters. In the _prepare
phase we do the grunt-work of creating and establishing new connection and open
another instance of the same RBD image. If any issues are observed while creating a
connection using the new parameters we _abort the reload. The original connection to
the cluster is kept available and all ongoing I/O on it should be fine.

Once the _prepare phase completes successfully we enter the _commit phase. In this phase
we simple move the I/O over to the new fd for the corresponding image we have already
created in the _prepare phase and reclaim the old rados I/O context and connection.

It is important to note that because we want to use this feature when a QEMU VM is already
running, we need to switch the logic to have values in ceph.conf override the ones present
in the -drive file=* string in order for new changes to take place, for same keys present
in both places.

Signed-off-by: Vaibhav Bhembre <vaibhav@digitalocean.com>

diff --git a/block/rbd.c b/block/rbd.c
index 0a5840d..100f398 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -930,6 +930,125 @@ static int qemu_rbd_snap_list(BlockDriverState *bs,
     return snap_count;
 }
 
+static int qemu_rbd_reopen_prepare(BDRVReopenState *reopen_state,
+                               BlockReopenQueue *queue, Error **errp)
+{
+    BDRVRBDState *new_s;
+    rados_t c;
+    rados_ioctx_t io_ctx;
+    char pool[RBD_MAX_POOL_NAME_SIZE];
+    char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
+    char conf[RBD_MAX_CONF_SIZE];
+    char clientname_buf[RBD_MAX_CONF_VAL_SIZE];
+    char *clientname;
+    int r;
+
+    new_s = reopen_state->opaque = g_new0(BDRVRBDState, 1);
+
+    r = qemu_rbd_parsename(reopen_state->bs->filename,
+                           pool, sizeof pool,
+                           snap_buf, sizeof snap_buf,
+                           new_s->name, sizeof new_s->name,
+                           conf, sizeof conf,
+                           errp);
+    if (r < 0) {
+        return r;
+    }
+
+    if (snap_buf[0] != '\0') {
+        new_s->snap = g_strdup(snap_buf);
+    }
+
+    clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
+    r = rados_create(&c, clientname);
+    if (r < 0) {
+        error_setg_errno(errp, -r, "error creating cluster from config");
+        return r;
+    }
+    new_s->cluster = c;
+
+    if (conf[0] != '\0') {
+        r = qemu_rbd_set_conf(c, conf, false, errp);
+        if (r < 0) {
+            error_setg_errno(errp, -r, "error setting config");
+            return r;
+        }
+    }
+
+    if (strstr(conf, "conf=") == NULL) {
+        r = rados_conf_read_file(c, NULL);
+    } else if (conf[0] != '\0') {
+        r = qemu_rbd_set_conf(c, conf, true, errp);
+    }
+
+    if (r < 0) {
+        error_setg_errno(errp, -r, "error parsing config");
+        return r;
+    }
+
+    r = rados_connect(c);
+    if (r < 0) {
+        error_setg_errno(errp, -r, "error connecting");
+        return r;
+    }
+
+    r = rados_ioctx_create(c, pool, &io_ctx);
+    if (r < 0) {
+        error_setg_errno(errp, -r, "error creating ioctx");
+        return r;
+    }
+    new_s->io_ctx = io_ctx;
+
+    r = rbd_open(io_ctx, new_s->name, &new_s->image, new_s->snap);
+    if (r < 0) {
+        error_setg_errno(errp, -r, "error opening rbd");
+        return r;
+    }
+
+    return 0;
+}
+
+static void qemu_rbd_reopen_abort(BDRVReopenState *reopen_state)
+{
+    BDRVRBDState *new_s = reopen_state->opaque;
+
+    if (new_s->io_ctx) {
+        rados_ioctx_destroy(new_s->io_ctx);
+    }
+
+    if (new_s->cluster) {
+        rados_shutdown(new_s->cluster);
+    }
+
+    g_free(new_s->snap);
+    g_free(reopen_state->opaque);
+    reopen_state->opaque = NULL;
+}
+
+static void qemu_rbd_reopen_commit(BDRVReopenState *reopen_state)
+{
+    BDRVRBDState *s, *new_s;
+
+    s = reopen_state->bs->opaque;
+    new_s = reopen_state->opaque;
+
+    rados_aio_flush(s->io_ctx);
+
+    rbd_close(s->image);
+    rados_ioctx_destroy(s->io_ctx);
+    g_free(s->snap);
+    rados_shutdown(s->cluster);
+
+    s->io_ctx = new_s->io_ctx;
+    s->cluster = new_s->cluster;
+    s->image = new_s->image;
+    s->snap = new_s->snap;
+    reopen_state->bs->read_only = (s->snap != NULL);
+
+    g_free(reopen_state->opaque);
+    reopen_state->opaque = NULL;
+}
+
 #ifdef LIBRBD_SUPPORTS_DISCARD
 static BlockAIOCB* qemu_rbd_aio_discard(BlockDriverState *bs,
                                         int64_t sector_num,
@@ -989,6 +1108,9 @@ static BlockDriver bdrv_rbd = {
     .create_opts        = &qemu_rbd_create_opts,
     .bdrv_getlength     = qemu_rbd_getlength,
     .bdrv_truncate      = qemu_rbd_truncate,
+    .bdrv_reopen_prepare = qemu_rbd_reopen_prepare,
+    .bdrv_reopen_commit  = qemu_rbd_reopen_commit,
+    .bdrv_reopen_abort   = qemu_rbd_reopen_abort,
     .protocol_name      = "rbd",
 
     .bdrv_aio_readv         = qemu_rbd_aio_readv,
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 98b4b1a..3d06dc0 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1759,3 +1759,17 @@ ETEXI
 STEXI
 @end table
 ETEXI
+
+
+    {
+        .name = "reload-rbd-config",
+        .args_type = "node:s",
+        .params = "node",
+        .help = "reload rbd ceph config live",
+        .mhandler.cmd = hmp_reload_rbd_config,
+    },
+
+STEXI
+@item reload rbd config
+Reload ceph config for RBD image.
+ETEXI
diff --git a/hmp.c b/hmp.c
index 0cf5baa..4cf0036 100644
--- a/hmp.c
+++ b/hmp.c
@@ -2474,3 +2474,16 @@ void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict)
 
     qapi_free_HotpluggableCPUList(saved);
 }
+
+void hmp_reload_rbd_config(Monitor *mon, const QDict *qdict)
+{
+    const char *node = qdict_get_str(qdict, "node");
+    Error *err = NULL;
+
+    qmp_reload_rbd_config(node, &err);
+    if (err) {
+        monitor_printf(mon, "%s\n", error_get_pretty(err));
+        error_free(err);
+        return;
+    }
+}
diff --git a/hmp.h b/hmp.h
index f5d9749..8d2edf7 100644
--- a/hmp.h
+++ b/hmp.h
@@ -133,5 +133,6 @@ void hmp_rocker_of_dpa_flows(Monitor *mon, const QDict *qdict);
 void hmp_rocker_of_dpa_groups(Monitor *mon, const QDict *qdict);
 void hmp_info_dump(Monitor *mon, const QDict *qdict);
 void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict);
+void hmp_reload_rbd_config(Monitor *mon, const QDict *qdict);
 
 #endif
diff --git a/qapi-schema.json b/qapi-schema.json
index d2d6506..83921d6 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -4317,3 +4317,16 @@
 # Since: 2.7
 ##
 { 'command': 'query-hotpluggable-cpus', 'returns': ['HotpluggableCPU'] }
+
+##
+# @reload-rbd-config
+#
+# Reload the ceph config for a given RBD block device attached to the VM.
+#
+# @node: Name of the node.
+#
+# Returns: nothing on success.
+#
+# Since: 2.7
+##
+{'command': 'reload-rbd-config', 'data': { 'node': 'str' } }
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 6937e83..906b1fe 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -4983,3 +4983,24 @@ Example for pseries machine type started with
      { "props": { "core-id": 0 }, "type": "POWER8-spapr-cpu-core",
        "vcpus-count": 1, "qom-path": "/machine/unattached/device[0]"}
    ]}'
+
+EQMP
+
+     {
+        .name = "reload-rbd-config",
+        .args_type = "node:s",
+        .mhandler.cmd_new = qmp_marshal_reload_rbd_config,
+     },
+
+SQMP
+reload-rbd-config
+-----------------------------------------
+
+Reload the ceph config for an RBD block device.
+
+Arguments: None.
+
+Example:
+
+-> { "execute": "reload-rbd-config", "arguments": { "node": "drive-virtio-disk0" } }
+<- { "return": {} }
diff --git a/qmp.c b/qmp.c
index b6d531e..1ff81d6 100644
--- a/qmp.c
+++ b/qmp.c
@@ -707,3 +707,34 @@ ACPIOSTInfoList *qmp_query_acpi_ospm_status(Error **errp)
 
     return head;
 }
+
+void qmp_reload_rbd_config(const char *node, Error **errp)
+{
+    BlockBackend *blk;
+    BlockDriverState *bs;
+    Error *local_err = NULL;
+    int ret;
+
+    blk = blk_by_name(node);
+    if (!blk) {
+        error_setg(errp, QERR_INVALID_PARAMETER, "node");
+        return;
+    }
+
+    bs = blk_bs(blk);
+    if (!bs) {
+        error_setg(errp, "no BDS found");
+        return;
+    }
+
+    ret = bdrv_reopen(bs, bdrv_get_flags(bs), &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    if (ret) {
+        error_setg_errno(errp, -ret, "failed reopening node");
+        return;
+    }
+}
-- 
1.9.1

             reply	other threads:[~2016-07-14 19:32 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-07-14 19:32 Vaibhav Bhembre [this message]
2016-07-14 20:28 ` [Qemu-devel] [PATCH v2] rbd: reload ceph config for block device Eric Blake
2016-07-14 20:53   ` Vaibhav Bhembre
2016-07-14 21:19     ` Eric Blake
2016-07-14 23:14       ` Vaibhav Bhembre

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1468524731-2306-1-git-send-email-vaibhav@digitalocean.com \
    --to=vaibhav@digitalocean.com \
    --cc=armbru@redhat.com \
    --cc=eblake@redhat.com \
    --cc=jcody@redhat.com \
    --cc=jdurgin@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=lcapitulino@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).