From mboxrd@z Thu Jan 1 00:00:00 1970 From: lhh@sourceware.org Date: 15 Feb 2007 22:51:04 -0000 Subject: [Cluster-devel] cluster/rgmanager/src/resources Makefile fs.sh ... Message-ID: <20070215225104.14389.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Changes by: lhh at sourceware.org 2007-02-15 22:51:03 Modified files: rgmanager/src/resources: Makefile fs.sh Added files: rgmanager/src/resources: lvm.sh Log message: Add LVM failover agent; by Jon Brassow Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/lvm.sh.diff?cvsroot=cluster&r1=1.1&r2=1.2 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/Makefile.diff?cvsroot=cluster&r1=1.15&r2=1.16 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/fs.sh.diff?cvsroot=cluster&r1=1.19&r2=1.20 --- cluster/rgmanager/src/resources/lvm.sh 2007/02/15 22:46:00 1.1 +++ cluster/rgmanager/src/resources/lvm.sh 2007/02/15 22:51:03 1.2 @@ -0,0 +1,408 @@ +#!/bin/bash + +# +# Copyright Red Hat Inc., 2007 +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; see the file COPYING. If not, write to the +# Free Software Foundation, Inc., 675 Mass Ave, Cambridge, +# MA 02139, USA. +# + +# +# LVM Failover Script. +# +# This script correctly handles: +# - Relocation +# - Fail-over +# - Disk failure + Fail-over +# If you don't know what those mean, ASK! (jbrassow at redhat.com) +# NOTE: Changes to /etc/lvm/lvm.conf are required for proper operation. +# +# This script should handle (but doesn't right now): +# - Operations on VG level. Make lv_name optional. This would have +# the effect of moving all LVs in a VG, not just one LV + + + +LC_ALL=C +LANG=C +PATH=/bin:/sbin:/usr/bin:/usr/sbin +export LC_ALL LANG PATH + +. $(dirname $0)/ocf-shellfuncs +. $(dirname $0)/utils/member_util.sh + +rv=0 + +meta_data() +{ + cat < + + 1.0 + + + This defines a LVM volume group that is ... + + + + LVM Failover script + + + + + + Descriptive name LVM Volume group + + + Name + + + + + + + If you can see this, your GUI is broken. + + + If you can see this, your GUI is broken. + + + + + + + If you can see this, your GUI is broken. + + + If you can see this, your GUI is broken. + + + + + + + If set and unmounting the file system fails, the node will + try to kill lockd and issue reclaims across all remaining + network interface cards. + + + Enable NFS lock workarounds + + + + + + + + + + + + + + + + + + + + + + +EOT +} + +# verify_all +# +# Verify the parameters passed in +# +verify_all() +{ + declare lv_path="$OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" + declare -i ret=0 + + # Anything to verify? Perhaps the names? + ocf_log notice "Verifying $lv_path" + + return $ret +} + +vg_status() +{ + return $OCF_ERR_GENERIC +} + +vg_activate() +{ + return $OCF_ERR_GENERIC +} + +# lv_status +# +# Is the LV active? +lv_status() +{ + declare lv_path="$OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" + declare dev="/dev/$lv_path" + declare realdev + declare owner + declare my_name + + # + # Check if device is active + # + if [[ ! $(lvs -o attr --noheadings vg/mirror) =~ ....a. ]]; then + return $OCF_ERR_GENERIC + fi + + # + # Check if all links/device nodes are present + # + if [ -h "$dev" ]; then + realdev=$(readlink -f $dev) + if [ $? -ne 0 ]; then + ocf_log err "Failed to follow link, $dev" + return $OCF_ERR_ARGS + fi + + if [ ! -b $realdev ]; then + ocf_log err "Device node for $lv_path is not present" + return $OCF_ERR_GENERIC + fi + else + ocf_log err "Symbolic link for $lv_path is not present" + return $OCF_ERR_GENERIC + fi + + # + # Verify that we are the correct owner + # + owner=`lvs -o tags --noheadings $lv_path` + my_name=$(local_node_name) + if [ -z $my_name ]; then + ocf_log err "Unable to determine local machine name" + + # FIXME: I don't really want to fail on 1st offense + return $OCF_SUCCESS + fi + + if [ -z $owner ] || [ $my_name != $owner ]; then + ocf_log err "WARNING: $lv_path should not be active" + ocf_log err "WARNING: $my_name does not own $lv_path" + ocf_log err "WARNING: Attempting shutdown of $lv_path" + + lvchange -an $lv_path + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +# lv_activate_and_tag +lv_activate_and_tag() +{ + declare action=$1 + declare tag=$2 + declare lv_path=$3 + + if [ -z $action ] || [ -z $tag ] || [ -z $lv_path ]; then + ocf_log err "Supplied args: 1) $action, 2) $tag, 3) $lv_path" + return $OCF_ERR_ARGS + fi + + if [ $action == "start" ]; then + ocf_log notice "Activating $lv_path" + lvchange --addtag $tag $lv_path + if [ $? -ne 0 ]; then + ocf_log err "Unable to add tag to $lv_path" + return $OCF_ERR_GENERIC + fi + lvchange -ay $lv_path + if [ $? -ne 0 ]; then + ocf_log err "Unable to activate $lv_path" + return $OCF_ERR_GENERIC + fi + else + ocf_log notice "Deactivating $lv_path" + lvchange -an $lv_path + if [ $? -ne 0 ]; then + ocf_log err "Unable to deactivate $lv_path" + return $OCF_ERR_GENERIC + fi + + ocf_log notice "Removing ownership tag ($tag) from $lv_path" + + lvchange --deltag $tag $lv_path + if [ $? -ne 0 ]; then + ocf_log err "Unable to delete tag from $lv_path" + return $OCF_ERR_GENERIC + fi + fi + + return $OCF_SUCCESS +} + +# lv_activate +# $1: start/stop only +# +# Basically, if we want to [de]activate an LVM volume, +# we must own it. That means that our tag must be on it. +# This requires a change to /etc/lvm/lvm.conf: +# volume_list = [ "root_volume", "@my_hostname" ] +# where "root_volume" is your root volume group and +# "my_hostname" is $(local_node_name) +# +# If there is a node failure, we may wish to "steal" the +# LV. For that, we need to check if the node that owns +# it is still part of the cluster. We use the tag to +# determine who owns the volume then query for their +# liveness. If they are dead, we can steal. +lv_activate() +{ + declare lv_path="$OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" + declare owner=`lvs -o tags --noheadings $lv_path` + declare my_name=$(local_node_name) + + if [ -z $my_name ]; then + ocf_log err "Unable to determine cluster node name" + return $OCF_ERR_GENERIC + fi + + # + # FIXME: This code block is repeated below... might be + # nice to put it in a function + # + if [ ! -z $owner ] && [ $owner != $my_name ]; then + if is_node_member_clustat $owner ; then + ocf_log err "$owner owns $lv_path unable to $1" + return $OCF_ERR_GENERIC + fi + ocf_log notice "Owner of $lv_path is not in the cluster" + ocf_log notice "Stealing $lv_path" + + lvchange --deltag $owner $lv_path + if [ $? -ne 0 ]; then + ocf_log err "Failed to steal $lv_path from $owner" + return $OCF_ERR_GENERIC + fi + + # Warning --deltag doesn't always result in failure + if [ ! -z `lvs -o tags --noheadings $lv_path` ]; then + ocf_log err "Failed to steal $lv_path from $owner." + return $OCF_ERR_GENERIC + fi + fi + + if ! lv_activate_and_tag $1 $my_name $lv_path; then + ocf_log err "Failed to $1 $lv_path" + + if [ "$1" == "start" ]; then + ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name" + + if vgreduce --removemissing --config \ + "activation { volume_list = \"$OCF_RESKEY_vg_name\" }" \ + $OCF_RESKEY_vg_name; then + ocf_log notice "$OCF_RESKEY_vg_name now consistent" + owner=`lvs -o tags --noheadings $lv_path` + if [ ! -z $owner ] && [ $owner != $my_name ]; then + if is_node_member_clustat $owner ; then + ocf_log err "$owner owns $lv_path unable to $1" + return $OCF_ERR_GENERIC + fi + ocf_log notice "Owner of $lv_path is not in the cluster" + ocf_log notice "Stealing $lv_path" + + lvchange --deltag $owner $lv_path + if [ $? -ne 0 ]; then + ocf_log err "Failed to steal $lv_path from $owner" + return $OCF_ERR_GENERIC + fi + + # Warning --deltag doesn't always result in failure + if [ ! -z `lvs -o tags --noheadings $lv_path` ]; then + ocf_log err "Failed to steal $lv_path from $owner." + return $OCF_ERR_GENERIC + fi + fi + + if ! lv_activate_and_tag $1 $my_name $lv_path; then + ocf_log err "Failed second attempt to $1 $lv_path" + return $OCF_ERR_GENERIC + else + ocf_log notice "Second attempt to $1 $lv_path successful" + return $OCF_SUCCESS + fi + else + ocf_log err "Failed to make $OCF_RESKEY_vg_name consistent" + return $OCF_ERR_GENERIC + fi + else + ocf_log err "Failed to $1 $lv_path" + return $OCF_ERR_GENERIC + fi + fi + return $OCF_SUCCESS +} + +case $1 in +start) + if [ -z $OCF_RESKEY_lv_name ]; then + vg_activate start || exit 1 + else + lv_activate start || exit 1 + fi + rv=0 + ;; + +status|monitor) + if [ -z $OCF_RESKEY_lv_name ]; then + vg_status || exit 1 + else + lv_status || exit 1 + fi + rv=0 + ;; + +stop) + if [ -z $OCF_RESKEY_lv_name ]; then + vg_activate stop || exit 1 + else + lv_activate stop || exit 1 + fi + rv=0 + ;; + +recover|restart) + $0 stop || exit $OCF_ERR_GENERIC + $0 start || exit $OCF_ERR_GENERIC + rv=0 + ;; + +meta-data) + meta_data + rv=0 + ;; + +verify-all) + verify_all + rv=$? + ;; +*) + echo "usage: $0 {start|status|monitor|stop|restart|meta-data|verify-all}" + exit $OCF_ERR_GENERIC + ;; +esac + +exit $rv --- cluster/rgmanager/src/resources/Makefile 2007/02/14 19:09:45 1.15 +++ cluster/rgmanager/src/resources/Makefile 2007/02/15 22:51:03 1.16 @@ -20,10 +20,10 @@ RESOURCES=fs.sh service.sh ip.sh nfsclient.sh nfsexport.sh \ script.sh netfs.sh clusterfs.sh smb.sh \ apache.sh openldap.sh samba.sh mysql.sh \ - postgres-8.sh tomcat-5.sh + postgres-8.sh tomcat-5.sh lvm.sh METADATA=apache.metadata openldap.metadata samba.metadata \ - mysql.metadata postgres-8.metadata tomcat-5.metadata + mysql.metadata postgres-8.metadata tomcat-5.metadata TARGETS=${RESOURCES} ocf-shellfuncs svclib_nfslock --- cluster/rgmanager/src/resources/fs.sh 2007/02/13 15:56:10 1.19 +++ cluster/rgmanager/src/resources/fs.sh 2007/02/15 22:51:03 1.20 @@ -145,7 +145,7 @@ - + If set and unmounting the file system fails, the node will try to kill lockd and issue reclaims across all remaining