From mboxrd@z Thu Jan 1 00:00:00 1970 From: Hongyang Yang Subject: Re: [PATCH v11 5/6] remus drbd: Implement remus drbd replicated disk Date: Mon, 16 Jun 2014 11:53:30 +0800 Message-ID: <539E6A3A.5090205@cn.fujitsu.com> References: <1402625212-25155-1-git-send-email-yanghy@cn.fujitsu.com> <1402625212-25155-6-git-send-email-yanghy@cn.fujitsu.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii"; Format="flowed" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: rshriram@cs.ubc.ca Cc: ian.campbell@citrix.com, wency@cn.fujitsu.com, stefano.stabellini@eu.citrix.com, ian.jackson@eu.citrix.com, Jiang Yunhong , eddie.dong@intel.com, xen-devel@lists.xen.org, andrew.cooper3@citrix.com, laijs@cn.fujitsu.com, Roger Pau Monne List-Id: xen-devel@lists.xenproject.org On 06/16/2014 11:33 AM, Shriram Rajagopalan wrote: > > On Jun 13, 2014 7:39 AM, "Yang Hongyang" > wrote: > > > > Implement remus-drbd-replicated-checkpointing-disk based on > > generic remus devices framework. > > > > Signed-off-by: Lai Jiangshan > > > Signed-off-by: Wen Congyang > > > Signed-off-by: Yang Hongyang > > > --- > > tools/hotplug/Linux/Makefile | 1 + > > tools/hotplug/Linux/block-drbd-probe | 84 ++++++++++++ > > tools/libxl/Makefile | 2 +- > > tools/libxl/libxl_internal.h | 1 + > > tools/libxl/libxl_remus_device.c | 23 +++- > > tools/libxl/libxl_remus_disk_drbd.c | 249 +++++++++++++++++++++++++++++++++++ > > 6 files changed, 353 insertions(+), 7 deletions(-) > > create mode 100755 tools/hotplug/Linux/block-drbd-probe > > create mode 100644 tools/libxl/libxl_remus_disk_drbd.c > > > > diff --git a/tools/hotplug/Linux/Makefile b/tools/hotplug/Linux/Makefile > > index 13e1f5f..5dd8599 100644 > > --- a/tools/hotplug/Linux/Makefile > > +++ b/tools/hotplug/Linux/Makefile > > @@ -23,6 +23,7 @@ XEN_SCRIPTS += xen-hotplug-cleanup > > XEN_SCRIPTS += external-device-migrate > > XEN_SCRIPTS += vscsi > > XEN_SCRIPTS += block-iscsi > > +XEN_SCRIPTS += block-drbd-probe > > XEN_SCRIPTS += $(XEN_SCRIPTS-y) > > > > XEN_SCRIPT_DATA = xen-script-common.sh locking.sh logging.sh > > diff --git a/tools/hotplug/Linux/block-drbd-probe > b/tools/hotplug/Linux/block-drbd-probe > > new file mode 100755 > > index 0000000..163ad04 > > --- /dev/null > > +++ b/tools/hotplug/Linux/block-drbd-probe > > @@ -0,0 +1,84 @@ > > +#! /bin/bash > > +# > > +# Copyright (C) 2014 FUJITSU LIMITED > > +# > > +# This library is free software; you can redistribute it and/or > > +# modify it under the terms of version 2.1 of the GNU Lesser General Public > > +# License as published by the Free Software Foundation. > > +# > > +# This library is distributed in the hope that it will be useful, > > +# but WITHOUT ANY WARRANTY; without even the implied warranty of > > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > +# Lesser General Public License for more details. > > +# > > +# You should have received a copy of the GNU Lesser General Public > > +# License along with this library; if not, write to the Free Software > > +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA > > +# > > +# Usage: > > +# block-drbd-probe devicename > > +# > > +# Return value: > > +# 0: the device is drbd device > > +# 1: the device is not drbd device > > +# 2: unkown error > > +# 3: the drbd device does not use protocol D > > +# 4: the drbd device is not ready > > + > > +drbd_res= > > + > > +function get_res_name() > > +{ > > + local drbd_dev=$1 > > + local drbd_dev_list=($(drbdadm sh-dev all)) > > + local drbd_res_list=($(drbdadm sh-resource all)) > > + local temp_drbd_dev temp_drbd_res > > + local found=0 > > + > > + for temp_drbd_dev in ${drbd_dev_list[@]}; do > > + if [[ "$temp_drbd_dev" == "$drbd_dev" ]]; then > > + found=1 > > + break > > + fi > > + done > > + > > + if [[ $found -eq 0 ]]; then > > + return 1 > > + fi > > + > > + for temp_drbd_res in ${drbd_res_list[@]}; do > > + temp_drbd_dev=$(drbdadm sh-dev $temp_drbd_res) > > + if [[ "$temp_drbd_dev" == "$drbd_dev" ]]; then > > + drbd_res="$temp_drbd_res" > > + return 0 > > + fi > > + done > > + > > + # OOPS > > + return 2 > > +} > > + > > +get_res_name $1 > > +if [[ $? -ne 0 ]]; then > > + exit $? > > +fi > > + > > +# check protocol > > +drbdsetup $1 show | grep -q "protocol D;" > > +if [[ $? -ne 0 ]]; then > > + exit 3 > > +fi > > + > > +# check connect status > > +state=$(drbdadm cstate "$drbd_res") > > +if [[ "$state" != "Connected" ]]; then > > + exit 4 > > +fi > > + > > +# check role > > +role=$(drbdadm role "$drbd_res") > > +if [[ "$role" != "Primary/Secondary" ]]; then > > + exit 4 > > +fi > > + > > +exit 0 > > diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile > > index 7a722a8..6f4d9b4 100644 > > --- a/tools/libxl/Makefile > > +++ b/tools/libxl/Makefile > > @@ -56,7 +56,7 @@ else > > LIBXL_OBJS-y += libxl_nonetbuffer.o > > endif > > > > -LIBXL_OBJS-y += libxl_remus_device.o > > +LIBXL_OBJS-y += libxl_remus_device.o libxl_remus_disk_drbd.o > > > > LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o > > LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o > > diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h > > index 47648cd..2424be0 100644 > > --- a/tools/libxl/libxl_internal.h > > +++ b/tools/libxl/libxl_internal.h > > @@ -2580,6 +2580,7 @@ struct libxl__remus_device_state { > > > > libxl_device_nic *nics; > > int num_nics; > > + libxl_device_disk *disks; > > int num_disks; > > > > /* for counting devices that have been handled */ > > diff --git a/tools/libxl/libxl_remus_device.c b/tools/libxl/libxl_remus_device.c > > index c447d7d..9637b1e 100644 > > --- a/tools/libxl/libxl_remus_device.c > > +++ b/tools/libxl/libxl_remus_device.c > > @@ -19,8 +19,10 @@ > > #include "libxl_internal.h" > > > > extern libxl__remus_device_ops remus_device_nic; > > +extern libxl__remus_device_ops remus_device_drbd_disk; > > static libxl__remus_device_ops *dev_ops[] = { > > &remus_device_nic, > > + &remus_device_drbd_disk, > > }; > > > > static void device_common_cb(libxl__egc *egc, > > @@ -197,6 +199,13 @@ static void device_teardown_cb(libxl__egc *egc, > > rds->nics = NULL; > > rds->num_nics = 0; > > > > + /* clean disk */ > > + for (i = 0; i < rds->num_disks; i++) > > + libxl_device_disk_dispose(&rds->disks[i]); > > + free(rds->disks); > > + rds->disks = NULL; > > + rds->num_disks = 0; > > + > > /* clean device ops */ > > for (i = 0; i < ARRAY_SIZE(dev_ops); i++) { > > ops = dev_ops[i]; > > @@ -272,15 +281,15 @@ void libxl__remus_device_setup(libxl__egc *egc, > libxl__remus_state *rs) > > rds->num_nics = 0; > > rds->num_disks = 0; > > > > - /* TBD: Remus setup - i.e. attach qdisc, enable disk buffering, etc */ > > - > > if (rs->netbufscript) { > > rds->nics = libxl_device_nic_list(CTX, rs->domid, &rds->num_nics); > > } > > + rds->disks = libxl_device_disk_list(CTX, rs->domid, &rds->num_disks); > > > > - GCNEW_ARRAY(rds->dev, rds->num_nics + rds->num_disks); > > + if (rds->num_nics == 0 && rds->num_disks == 0) > > + goto out; > > > > - /* TBD: CALL libxl__remus_device_init to init remus devices */ > > + GCNEW_ARRAY(rds->dev, rds->num_nics + rds->num_disks); > > > > if (rs->netbufscript && rds->nics) { > > for (i = 0; i < rds->num_nics; i++) { > > @@ -289,8 +298,10 @@ void libxl__remus_device_setup(libxl__egc *egc, > libxl__remus_state *rs) > > } > > } > > > > - if (rds->num_nics == 0 && rds->num_disks == 0) > > - goto out; > > + for (i = 0; i < rds->num_disks; i++) { > > + libxl__remus_device_init(egc, rds, > > + LIBXL__REMUS_DEVICE_DISK, &rds->disks[i]); > > + } > > > > return; > > > > diff --git a/tools/libxl/libxl_remus_disk_drbd.c > b/tools/libxl/libxl_remus_disk_drbd.c > > new file mode 100644 > > index 0000000..69fdd34 > > --- /dev/null > > +++ b/tools/libxl/libxl_remus_disk_drbd.c > > @@ -0,0 +1,249 @@ > > +/* > > + * Copyright (C) 2014 FUJITSU LIMITED > > + * Author Lai Jiangshan > > > + * > > + * This program is free software; you can redistribute it and/or modify > > + * it under the terms of the GNU Lesser General Public License as published > > + * by the Free Software Foundation; version 2.1 only. with the special > > + * exception on linking described in file LICENSE. > > + * > > + * This program is distributed in the hope that it will be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > > + * GNU Lesser General Public License for more details. > > + */ > > + > > +#include "libxl_osdeps.h" /* must come before any other headers */ > > + > > +#include "libxl_internal.h" > > + > > +/*** drbd implementation ***/ > > +const int DRBD_SEND_CHECKPOINT = 20; > > +const int DRBD_WAIT_CHECKPOINT_ACK = 30; > > + > > +typedef struct libxl__remus_drbd_disk { > > + libxl__remus_device remus_dev; > > + int ctl_fd; > > + int ackwait; > > + const char *path; > > +} libxl__remus_drbd_disk; > > + > > +typedef struct libxl__remus_drbd_state { > > + libxl__ao *ao; > > + char *drbd_probe_script; > > +} libxl__remus_drbd_state; > > + > > +static void drbd_async_call(libxl__remus_device *dev, > > + void func(libxl__remus_device *), > > + libxl__ev_child_callback callback) > > +{ > > + int pid = -1; > > + STATE_AO_GC(dev->rds->ao); > > + > > + /* Fork and call */ > > + pid = libxl__ev_child_fork(gc, &dev->child, callback); > > + if (pid == -1) { > > + LOG(ERROR, "unable to fork"); > > + goto out; > > + } > > + > > + if (!pid) { > > + /* child */ > > + func(dev); > > + /* notreached */ > > + abort(); > > + } > > + > > + return; > > + > > +out: > > + dev->callback(dev->rds->egc, dev, ERROR_FAIL); > > +} > > + > > +static void chekpoint_async_call_done(libxl__egc *egc, > > + libxl__ev_child *child, > > + pid_t pid, int status) > > +{ > > + libxl__remus_device *dev = CONTAINER_OF(child, *dev, child); > > + libxl__remus_drbd_disk *rdd = dev->data; > > + STATE_AO_GC(dev->rds->ao); > > + > > + if (WIFEXITED(status)) { > > + rdd->ackwait = WEXITSTATUS(status); > > + dev->callback(egc, dev, 0); > > + } else { > > + dev->callback(egc, dev, ERROR_FAIL); > > + } > > +} > > + > > +/* this op will not wait and block, so implement as sync op */ > > +static void drbd_postsuspend(libxl__remus_device *dev) > > +{ > > + libxl__remus_drbd_disk *rdd = dev->data; > > + > > + if (!rdd->ackwait) { > > + if (ioctl(rdd->ctl_fd, DRBD_SEND_CHECKPOINT, 0) <= 0) > > + rdd->ackwait = 1; > > + } > > + > > + dev->callback(dev->rds->egc, dev, 0); > > +} > > + > > +static void drbd_preresume_async(libxl__remus_device *dev) > > +{ > > + libxl__remus_drbd_disk *rdd = dev->data; > > + int ackwait = rdd->ackwait; > > + > > + if (ackwait) { > > + ioctl(rdd->ctl_fd, DRBD_WAIT_CHECKPOINT_ACK, 0); > > + ackwait = 0; > > + } > > + > > + _exit(ackwait); > > +} > > + > > +static void drbd_preresume(libxl__remus_device *dev) > > +{ > > + drbd_async_call(dev, drbd_preresume_async, chekpoint_async_call_done); > > +} > > + > > +static int drbd_init(libxl__remus_device_ops *self, > > + libxl__remus_state *rs) > > +{ > > + libxl__remus_drbd_state *drbd_state; > > + > > + STATE_AO_GC(rs->ao); > > + > > + GCNEW(drbd_state); > > + self->data = drbd_state; > > + drbd_state->ao = ao; > > + drbd_state->drbd_probe_script = GCSPRINTF("%s/block-drbd-probe", > > + libxl__xen_script_dir_path()); > > + > > + > > + return 0; > > +} > > + > > +static void drbd_destroy(libxl__remus_device_ops *self) > > +{ > > + return; > > +} > > + > > +static void match_async_exec_cb(libxl__egc *egc, > > + libxl__async_exec_state *aes, > > + int status) > > +{ > > + libxl__remus_device *dev = CONTAINER_OF(aes, *dev, aes); > > + > > + if (status) { > > + dev->callback(egc, dev, ERROR_NOT_MATCH); > > + } else { > > + dev->callback(egc, dev, 0); > > + } > > +} > > + > > +static void match_async_exec(libxl__egc *egc, libxl__remus_device *dev) > > +{ > > + int arraysize, nr = 0; > > + const libxl_device_disk *disk = dev->backend_dev; > > + libxl__remus_drbd_state *drbd_state = dev->ops->data; > > + libxl__async_exec_state *aes = &dev->aes; > > + STATE_AO_GC(drbd_state->ao); > > + > > + /* setup env & args */ > > + arraysize = 1; > > + GCNEW_ARRAY(aes->env, arraysize); > > + aes->env[nr++] = NULL; > > + assert(nr <= arraysize); > > + > > + arraysize = 3; > > + nr = 0; > > + GCNEW_ARRAY(aes->args, arraysize); > > + aes->args[nr++] = drbd_state->drbd_probe_script; > > + aes->args[nr++] = disk->pdev_path; > > + aes->args[nr++] = NULL; > > + assert(nr <= arraysize); > > + > > + aes->ao = drbd_state->ao; > > + aes->what = GCSPRINTF("%s %s", aes->args[0], aes->args[1]); > > + aes->timeout_ms = LIBXL_HOTPLUG_TIMEOUT * 1000; > > + aes->callback = match_async_exec_cb; > > + aes->stdfds[0] = -1; > > + aes->stdfds[1] = -1; > > + aes->stdfds[2] = -1; > > + > > + if (libxl__async_exec_start(gc, aes)) > > + goto out; > > + > > + return; > > + > > +out: > > + dev->callback(egc, dev, ERROR_FAIL); > > +} > > + > > +static void match_async_call_done(libxl__egc *egc, > > + libxl__ev_child *child, > > + pid_t pid, int status) > > +{ > > + libxl__remus_device *dev = CONTAINER_OF(child, *dev, child); > > + STATE_AO_GC(dev->rds->ao); > > + > > + if (WIFEXITED(status)) { > > + if (-WEXITSTATUS(status) == ERROR_NOT_MATCH) { > > + dev->callback(egc, dev, ERROR_NOT_MATCH); > > + } else { > > + match_async_exec(egc, dev); > > + } > > + } else { > > + dev->callback(egc, dev, ERROR_FAIL); > > + } > > +} > > + > > +static void drbd_match_async(libxl__remus_device *dev) > > +{ > > + if (dev->kind != LIBXL__REMUS_DEVICE_DISK) > > + _exit(-ERROR_NOT_MATCH); > > + > > + _exit(0); > > +} > > + > > +static void drbd_match(libxl__remus_device_ops *self, > > + libxl__remus_device *dev) > > +{ > > + drbd_async_call(dev, drbd_match_async, match_async_call_done); > > +} > > + > > +static void drbd_setup(libxl__remus_device *dev) > > +{ > > + libxl__remus_drbd_disk *drbd_disk; > > + const libxl_device_disk *disk = dev->backend_dev; > > + STATE_AO_GC(dev->rds->ao); > > + > > + GCNEW(drbd_disk); > > + dev->data = drbd_disk; > > + drbd_disk->path = disk->pdev_path; > > + drbd_disk->ackwait = 0; > > + drbd_disk->ctl_fd = open(drbd_disk->path, O_RDONLY); > > + if (drbd_disk->ctl_fd < 0) > > + dev->callback(dev->rds->egc, dev, ERROR_FAIL); > > + else > > + dev->callback(dev->rds->egc, dev, 0); > > +} > > + > > +static void drbd_teardown(libxl__remus_device *dev) > > +{ > > + libxl__remus_drbd_disk *drbd_disk = dev->data; > > + > > + close(drbd_disk->ctl_fd); > > + dev->callback(dev->rds->egc, dev, 0); > > +} > > + > > +libxl__remus_device_ops remus_device_drbd_disk = { > > + .init = drbd_init, > > + .destroy = drbd_destroy, > > + .postsuspend = drbd_postsuspend, > > + .preresume = drbd_preresume, > > + .match = drbd_match, > > + .setup = drbd_setup, > > + .teardown = drbd_teardown, > > +}; > > -- > > 1.9.1 > > > > I am fine with this patch. My only feedback is to have an option in xl remus > command to explicitly disable disk buffering, similar to the netbuffer disable > switch. > Agree, will add the option, thanks for the review! > Acked-by: Shriram Rajagopalan > > -- Thanks, Yang.