From mboxrd@z Thu Jan 1 00:00:00 1970 From: Hongyang Yang Subject: Re: [PATCH v10] remus drbd: Implement remus drbd replicated disk Date: Fri, 6 Jun 2014 10:21:40 +0800 Message-ID: <539125B4.2010802@cn.fujitsu.com> References: <1401932069-16460-6-git-send-email-yanghy@cn.fujitsu.com> <1401932386-16538-1-git-send-email-yanghy@cn.fujitsu.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii"; Format="flowed" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: rshriram@cs.ubc.ca Cc: Lai Jiangshan , Wen Congyang , Ian Jackson , Jiang Yunhong , Dong Eddie , "xen-devel@lists.xen.org" , Andrew Cooper , =?UTF-8?B?Um9nZXIgUGF1IE1vbm7DqQ==?= , Ian Campbell List-Id: xen-devel@lists.xenproject.org On 06/06/2014 12:25 AM, Shriram Rajagopalan wrote: > On Wed, Jun 4, 2014 at 8:39 PM, Yang Hongyang > wrote: > > Implement remus-drbd-replicated-checkpointing-disk based on > generic remus devices framework. > > Signed-off-by: Lai Jiangshan > > Signed-off-by: Wen Congyang > > Signed-off-by: Yang Hongyang > > --- > tools/hotplug/Linux/Makefile | 1 + > tools/hotplug/Linux/block-drbd-probe | 84 ++++++++++ > tools/libxl/Makefile | 2 +- > tools/libxl/libxl_internal.h | 1 + > tools/libxl/libxl_remus_device.c | 23 ++- > tools/libxl/libxl_remus_disk_drbd.c | 290 +++++++++++++++++++++++++++++++++++ > 6 files changed, 394 insertions(+), 7 deletions(-) > create mode 100755 tools/hotplug/Linux/block-drbd-probe > create mode 100644 tools/libxl/libxl_remus_disk_drbd.c > > diff --git a/tools/hotplug/Linux/Makefile b/tools/hotplug/Linux/Makefile > index 13e1f5f..5dd8599 100644 > --- a/tools/hotplug/Linux/Makefile > +++ b/tools/hotplug/Linux/Makefile > @@ -23,6 +23,7 @@ XEN_SCRIPTS += xen-hotplug-cleanup > XEN_SCRIPTS += external-device-migrate > XEN_SCRIPTS += vscsi > XEN_SCRIPTS += block-iscsi > +XEN_SCRIPTS += block-drbd-probe > XEN_SCRIPTS += $(XEN_SCRIPTS-y) > > XEN_SCRIPT_DATA = xen-script-common.sh locking.sh logging.sh > diff --git a/tools/hotplug/Linux/block-drbd-probe > b/tools/hotplug/Linux/block-drbd-probe > new file mode 100755 > index 0000000..163ad04 > --- /dev/null > +++ b/tools/hotplug/Linux/block-drbd-probe > @@ -0,0 +1,84 @@ > +#! /bin/bash > +# > +# Copyright (C) 2014 FUJITSU LIMITED > +# > +# This library is free software; you can redistribute it and/or > +# modify it under the terms of version 2.1 of the GNU Lesser General Public > +# License as published by the Free Software Foundation. > +# > +# This library is distributed in the hope that it will be useful, > +# but WITHOUT ANY WARRANTY; without even the implied warranty of > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +# Lesser General Public License for more details. > +# > +# You should have received a copy of the GNU Lesser General Public > +# License along with this library; if not, write to the Free Software > +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA > +# > +# Usage: > +# block-drbd-probe devicename > +# > +# Return value: > +# 0: the device is drbd device > +# 1: the device is not drbd device > +# 2: unkown error > +# 3: the drbd device does not use protocol D > +# 4: the drbd device is not ready > + > +drbd_res= > + > +function get_res_name() > +{ > + local drbd_dev=$1 > + local drbd_dev_list=($(drbdadm sh-dev all)) > + local drbd_res_list=($(drbdadm sh-resource all)) > + local temp_drbd_dev temp_drbd_res > + local found=0 > + > + for temp_drbd_dev in ${drbd_dev_list[@]}; do > + if [[ "$temp_drbd_dev" == "$drbd_dev" ]]; then > + found=1 > + break > + fi > + done > + > + if [[ $found -eq 0 ]]; then > + return 1 > + fi > + > + for temp_drbd_res in ${drbd_res_list[@]}; do > + temp_drbd_dev=$(drbdadm sh-dev $temp_drbd_res) > + if [[ "$temp_drbd_dev" == "$drbd_dev" ]]; then > + drbd_res="$temp_drbd_res" > + return 0 > + fi > + done > + > + # OOPS > + return 2 > +} > + > +get_res_name $1 > +if [[ $? -ne 0 ]]; then > + exit $? > +fi > + > +# check protocol > +drbdsetup $1 show | grep -q "protocol D;" > +if [[ $? -ne 0 ]]; then > + exit 3 > +fi > + > +# check connect status > +state=$(drbdadm cstate "$drbd_res") > +if [[ "$state" != "Connected" ]]; then > + exit 4 > +fi > + > +# check role > +role=$(drbdadm role "$drbd_res") > +if [[ "$role" != "Primary/Secondary" ]]; then > + exit 4 > +fi > + > +exit 0 > diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile > index 7a722a8..6f4d9b4 100644 > --- a/tools/libxl/Makefile > +++ b/tools/libxl/Makefile > @@ -56,7 +56,7 @@ else > LIBXL_OBJS-y += libxl_nonetbuffer.o > endif > > -LIBXL_OBJS-y += libxl_remus_device.o > +LIBXL_OBJS-y += libxl_remus_device.o libxl_remus_disk_drbd.o > > LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o > LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o > diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h > index f221f97..47a4ab9 100644 > --- a/tools/libxl/libxl_internal.h > +++ b/tools/libxl/libxl_internal.h > @@ -2519,6 +2519,7 @@ struct libxl__remus_device_state { > > libxl_device_nic *nics; > int num_nics; > + libxl_device_disk *disks; > int num_disks; > > /* for counting devices that have been handled */ > diff --git a/tools/libxl/libxl_remus_device.c b/tools/libxl/libxl_remus_device.c > index 5f07266..040441a 100644 > --- a/tools/libxl/libxl_remus_device.c > +++ b/tools/libxl/libxl_remus_device.c > @@ -19,8 +19,10 @@ > #include "libxl_internal.h" > > extern libxl__remus_device_ops remus_device_nic; > +extern libxl__remus_device_ops remus_device_drbd_disk; > static libxl__remus_device_ops *dev_ops[] = { > &remus_device_nic, > + &remus_device_drbd_disk, > }; > > static void device_common_cb(libxl__egc *egc, > @@ -194,6 +196,13 @@ static void device_teardown_cb(libxl__egc *egc, > rds->nics = NULL; > rds->num_nics = 0; > > + /* clean disk */ > + for (i = 0; i < rds->num_disks; i++) > + libxl_device_disk_dispose(&rds->disks[i]); > + free(rds->disks); > + rds->disks = NULL; > + rds->num_disks = 0; > + > /* clean device ops */ > for (i = 0; i < ARRAY_SIZE(dev_ops); i++) { > ops = dev_ops[i]; > @@ -269,15 +278,15 @@ void libxl__remus_device_setup(libxl__egc *egc, > libxl__remus_state *rs) > rds->num_nics = 0; > rds->num_disks = 0; > > - /* TBD: Remus setup - i.e. attach qdisc, enable disk buffering, etc */ > - > if (rs->netbufscript) { > rds->nics = libxl_device_nic_list(CTX, rs->domid, &rds->num_nics); > } > + rds->disks = libxl_device_disk_list(CTX, rs->domid, &rds->num_disks); > > - GCNEW_ARRAY(rds->dev, rds->num_nics + rds->num_disks); > + if (rds->num_nics == 0 && rds->num_disks == 0) > + goto out; > > - /* TBD: CALL libxl__remus_device_init to init remus devices */ > + GCNEW_ARRAY(rds->dev, rds->num_nics + rds->num_disks); > > if (rs->netbufscript && rds->nics) { > for (i = 0; i < rds->num_nics; i++) { > @@ -286,8 +295,10 @@ void libxl__remus_device_setup(libxl__egc *egc, > libxl__remus_state *rs) > } > } > > - if (rds->num_nics == 0 && rds->num_disks == 0) > - goto out; > + for (i = 0; i < rds->num_disks; i++) { > + libxl__remus_device_init(egc, rds, > + LIBXL__REMUS_DEVICE_DISK, &rds->disks[i]); > + } > > return; > > diff --git a/tools/libxl/libxl_remus_disk_drbd.c > b/tools/libxl/libxl_remus_disk_drbd.c > new file mode 100644 > index 0000000..f35a406 > --- /dev/null > +++ b/tools/libxl/libxl_remus_disk_drbd.c > @@ -0,0 +1,290 @@ > +/* > + * Copyright (C) 2014 FUJITSU LIMITED > + * Author Lai Jiangshan > > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU Lesser General Public License as published > + * by the Free Software Foundation; version 2.1 only. with the special > + * exception on linking described in file LICENSE. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU Lesser General Public License for more details. > + */ > + > +#include "libxl_osdeps.h" /* must come before any other headers */ > + > +#include "libxl_internal.h" > + > +/*** drbd implementation ***/ > +const int DRBD_SEND_CHECKPOINT = 20; > +const int DRBD_WAIT_CHECKPOINT_ACK = 30; > + > +typedef struct libxl__remus_drbd_disk { > + libxl__remus_device remus_dev; > + int ctl_fd; > + int ackwait; > + const char *path; > +} libxl__remus_drbd_disk; > + > +typedef struct libxl__remus_drbd_state { > + libxl__ao *ao; > + char *drbd_probe_script; > +} libxl__remus_drbd_state; > + > +static void drbd_async_call(libxl__remus_device *dev, > + void func(libxl__remus_device *), > + libxl__ev_child_callback callback) > +{ > + int pid = -1; > + STATE_AO_GC(dev->rds->ao); > + > + /* Fork and call */ > + pid = libxl__ev_child_fork(gc, &dev->child, callback); > + if (pid == -1) { > + LOG(ERROR, "unable to fork"); > + goto out; > + } > + > + if (!pid) { > + /* child */ > + func(dev); > + /* notreached */ > + abort(); > + } > + > + return; > + > +out: > + dev->callback(dev->rds->egc, dev, ERROR_FAIL); > +} > + > +static void chekpoint_async_call_done(libxl__egc *egc, > + libxl__ev_child *child, > + pid_t pid, int status) > +{ > + libxl__remus_device *dev = CONTAINER_OF(child, *dev, child); > + libxl__remus_drbd_disk *rdd = dev->data; > + STATE_AO_GC(dev->rds->ao); > + > + if (WIFEXITED(status)) { > + rdd->ackwait = WEXITSTATUS(status); > + dev->callback(egc, dev, 0); > + } else { > + dev->callback(egc, dev, ERROR_FAIL); > + } > +} > + > +static void drbd_postsuspend_async(libxl__remus_device *dev) > +{ > + libxl__remus_drbd_disk *rdd = dev->data; > + int ackwait = rdd->ackwait; > + > + if (!ackwait) { > + if (ioctl(rdd->ctl_fd, DRBD_SEND_CHECKPOINT, 0) <= 0) > + ackwait = 1; > + } > + > + _exit(ackwait); > +} > + > +static void drbd_postsuspend(libxl__remus_device *dev) > +{ > + drbd_async_call(dev, drbd_postsuspend_async, chekpoint_async_call_done); > +} > + > +static void drbd_preresume_async(libxl__remus_device *dev) > +{ > + libxl__remus_drbd_disk *rdd = dev->data; > + int ackwait = rdd->ackwait; > + > + if (ackwait) { > + ioctl(rdd->ctl_fd, DRBD_WAIT_CHECKPOINT_ACK, 0); > + ackwait = 0; > + } > + > + _exit(ackwait); > +} > + > +static void drbd_preresume(libxl__remus_device *dev) > +{ > + drbd_async_call(dev, drbd_preresume_async, chekpoint_async_call_done); > +} > + > > > > Please get rid of the async execution just to execute a sys call. Not to mention > a fork & exec per sys call, per checkpoint would just add more overhead than what > can be gleaned through async execution. > > But the setup and teardown can use the async execution drbd_async_call as they > involve > invoking the scripts. > > Apart from that, the rest of the code looks fine structurally. Hi Shriram, again thanks for review, per checkpoint, we only do fork and exit soon, no script execution. > > > shriram -- Thanks, Yang.