From mboxrd@z Thu Jan 1 00:00:00 1970 From: jbrassow@sourceware.org Date: 3 Jan 2008 21:02:54 -0000 Subject: [Cluster-devel] cluster/rgmanager/src/resources Makefile lvm.s ... Message-ID: <20080103210254.643.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Changes by: jbrassow at sourceware.org 2008-01-03 21:02:53 Modified files: rgmanager/src/resources: Makefile lvm.sh Added files: rgmanager/src/resources: lvm.metadata lvm_by_lv.sh lvm_by_vg.sh Log message: lvm resource script now allows multiple LVs per VG as long as they move together (exist on the same machine). Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/lvm.metadata.diff?cvsroot=cluster&r1=1.1&r2=1.2 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/lvm_by_lv.sh.diff?cvsroot=cluster&r1=1.1&r2=1.2 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/lvm_by_vg.sh.diff?cvsroot=cluster&r1=1.1&r2=1.2 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/Makefile.diff?cvsroot=cluster&r1=1.24&r2=1.25 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/lvm.sh.diff?cvsroot=cluster&r1=1.11&r2=1.12 --- cluster/rgmanager/src/resources/lvm.metadata 2008/01/03 20:35:39 1.1 +++ cluster/rgmanager/src/resources/lvm.metadata 2008/01/03 21:02:53 1.2 @@ -0,0 +1,73 @@ + + + 1.0 + + + This defines a LVM volume group that is ... + + + + LVM Failover script + + + + + + Descriptive name LVM Volume group + + + Name + + + + + + + If you can see this, your GUI is broken. + + + If you can see this, your GUI is broken. + + + + + + + If you can see this, your GUI is broken. + + + If you can see this, your GUI is broken. + + + + + + + If set and unmounting the file system fails, the node will + try to kill lockd and issue reclaims across all remaining + network interface cards. + + + Enable NFS lock workarounds + + + + + + + + + + + + + + + + + + + + + + --- cluster/rgmanager/src/resources/lvm_by_lv.sh 2008/01/03 20:35:39 1.1 +++ cluster/rgmanager/src/resources/lvm_by_lv.sh 2008/01/03 21:02:53 1.2 @@ -0,0 +1,331 @@ +#!/bin/bash + +# +# Copyright Red Hat Inc., 2007 +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; see the file COPYING. If not, write to the +# Free Software Foundation, Inc., 675 Mass Ave, Cambridge, +# MA 02139, USA. +# + +# lv_verify +# +# Verify the parameters passed in +# +lv_verify() +{ + # Anything to verify? Perhaps the names? + return $OCF_SUCCESS +} + +# lv_exec_resilient +# +# Sometimes, devices can come back. Their metadata will conflict +# with the good devices that remain. This function filters out those +# failed devices when executing the given command +# +# Finishing with vgscan resets the cache/filter +lv_exec_resilient() +{ + declare command=$1 + declare all_pvs + + ocf_log notice "Making resilient : $command" + + if [ -z $command ]; then + ocf_log err "lv_exec_resilient: Arguments not supplied" + return $OCF_ERR_ARGS + fi + + # pvs will print out only those devices that are valid + # If a device dies and comes back, it will not appear + # in pvs output (but you will get a Warning). + all_pvs=(`pvs --noheadings -o pv_name | grep -v Warning`) + + # Now we use those valid devices in a filter which we set up. + # The device will then be activated because there are no + # metadata conflicts. + command=$command" --config devices{filter=[" + for i in ${all_pvs[*]}; do + command=$command'"a|'$i'|",' + done + command=$command"\"r|.*|\"]}" + + ocf_log notice "Resilient command: $command" + if ! $command ; then + ocf_log err "lv_exec_resilient failed" + vgscan + return $OCF_ERR_GENERIC + else + vgscan + return $OCF_SUCCESS + fi +} + +# lv_activate_resilient +# +# Sometimes, devices can come back. Their metadata will conflict +# with the good devices that remain. We must filter out those +# failed devices when trying to reactivate +lv_activate_resilient() +{ + declare action=$1 + declare lv_path=$2 + declare op="-ay" + + if [ -z $action ] || [ -z $lv_path ]; then + ocf_log err "lv_activate_resilient: Arguments not supplied" + return $OCF_ERR_ARGS + fi + + if [ $action != "start" ]; then + op="-an" + fi + + if ! lv_exec_resilient "lvchange $op $lv_path" ; then + ocf_log err "lv_activate_resilient $action failed on $lv_path" + return $OCF_ERR_GENERIC + else + return $OCF_SUCCESS + fi +} + +# lv_status +# +# Is the LV active? +lv_status() +{ + declare lv_path="$OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" + declare dev="/dev/$lv_path" + declare realdev + declare owner + declare my_name + + # + # Check if device is active + # + if [[ ! $(lvs -o attr --noheadings $lv_path) =~ ....a. ]]; then + return $OCF_ERR_GENERIC + fi + + if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then + ocf_log notice "$OCF_RESKEY_vg_name is a cluster volume. Ignoring..." + return $OCF_SUCCESS + fi + + # + # Check if all links/device nodes are present + # + if [ -h "$dev" ]; then + realdev=$(readlink -f $dev) + if [ $? -ne 0 ]; then + ocf_log err "Failed to follow link, $dev" + return $OCF_ERR_ARGS + fi + + if [ ! -b $realdev ]; then + ocf_log err "Device node for $lv_path is not present" + return $OCF_ERR_GENERIC + fi + else + ocf_log err "Symbolic link for $lv_path is not present" + return $OCF_ERR_GENERIC + fi + + # + # Verify that we are the correct owner + # + owner=`lvs -o tags --noheadings $lv_path` + my_name=$(local_node_name) + if [ -z $my_name ]; then + ocf_log err "Unable to determine local machine name" + + # FIXME: I don't really want to fail on 1st offense + return $OCF_SUCCESS + fi + + if [ -z $owner ] || [ $my_name != $owner ]; then + ocf_log err "WARNING: $lv_path should not be active" + ocf_log err "WARNING: $my_name does not own $lv_path" + ocf_log err "WARNING: Attempting shutdown of $lv_path" + + lv_activate_resilient "stop" $lv_path + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +# lv_activate_and_tag +lv_activate_and_tag() +{ + declare action=$1 + declare tag=$2 + declare lv_path=$3 + + if [ -z $action ] || [ -z $tag ] || [ -z $lv_path ]; then + ocf_log err "Supplied args: 1) $action, 2) $tag, 3) $lv_path" + return $OCF_ERR_ARGS + fi + + if [ $action == "start" ]; then + ocf_log notice "Activating $lv_path" + lvchange --addtag $tag $lv_path + if [ $? -ne 0 ]; then + ocf_log err "Unable to add tag to $lv_path" + return $OCF_ERR_GENERIC + fi + + if ! lv_activate_resilient $action $lv_path; then + ocf_log err "Unable to activate $lv_path" + return $OCF_ERR_GENERIC + fi + else + ocf_log notice "Deactivating $lv_path" + if ! lv_activate_resilient $action $lv_path; then + ocf_log err "Unable to deactivate $lv_path" + return $OCF_ERR_GENERIC + fi + + ocf_log notice "Removing ownership tag ($tag) from $lv_path" + + lvchange --deltag $tag $lv_path + if [ $? -ne 0 ]; then + ocf_log err "Unable to delete tag from $lv_path" + return $OCF_ERR_GENERIC + fi + fi + + return $OCF_SUCCESS +} + +# lv_activate +# $1: start/stop only +# +# Basically, if we want to [de]activate an LVM volume, +# we must own it. That means that our tag must be on it. +# This requires a change to /etc/lvm/lvm.conf: +# volume_list = [ "root_volume", "@my_hostname" ] +# where "root_volume" is your root volume group and +# "my_hostname" is $(local_node_name) +# +# If there is a node failure, we may wish to "steal" the +# LV. For that, we need to check if the node that owns +# it is still part of the cluster. We use the tag to +# determine who owns the volume then query for their +# liveness. If they are dead, we can steal. +lv_activate() +{ + declare lv_path="$OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" + declare owner=`lvs -o tags --noheadings $lv_path` + declare my_name=$(local_node_name) + + if [ -z $my_name ]; then + ocf_log err "Unable to determine cluster node name" + return $OCF_ERR_GENERIC + fi + + # + # FIXME: This code block is repeated below... might be + # nice to put it in a function + # + if [ ! -z $owner ] && [ $owner != $my_name ]; then + if is_node_member_clustat $owner ; then + ocf_log err "$owner owns $lv_path unable to $1" + return $OCF_ERR_GENERIC + fi + ocf_log notice "Owner of $lv_path is not in the cluster" + ocf_log notice "Stealing $lv_path" + + lvchange --deltag $owner $lv_path + if [ $? -ne 0 ]; then + ocf_log err "Failed to steal $lv_path from $owner" + return $OCF_ERR_GENERIC + fi + + # Warning --deltag doesn't always result in failure + if [ ! -z `lvs -o tags --noheadings $lv_path` ]; then + ocf_log err "Failed to steal $lv_path from $owner." + return $OCF_ERR_GENERIC + fi + fi + + if ! lv_activate_and_tag $1 $my_name $lv_path; then + ocf_log err "Failed to $1 $lv_path" + + if [ "$1" == "start" ]; then + ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name" + + if vgreduce --removemissing --config \ + "activation { volume_list = \"$OCF_RESKEY_vg_name\" }" \ + $OCF_RESKEY_vg_name; then + ocf_log notice "$OCF_RESKEY_vg_name now consistent" + owner=`lvs -o tags --noheadings $lv_path` + if [ ! -z $owner ] && [ $owner != $my_name ]; then + if is_node_member_clustat $owner ; then + ocf_log err "$owner owns $lv_path unable to $1" + return $OCF_ERR_GENERIC + fi + ocf_log notice "Owner of $lv_path is not in the cluster" + ocf_log notice "Stealing $lv_path" + + lvchange --deltag $owner $lv_path + if [ $? -ne 0 ]; then + ocf_log err "Failed to steal $lv_path from $owner" + return $OCF_ERR_GENERIC + fi + + # Warning --deltag doesn't always result in failure + if [ ! -z `lvs -o tags --noheadings $lv_path` ]; then + ocf_log err "Failed to steal $lv_path from $owner." + return $OCF_ERR_GENERIC + fi + fi + + if ! lv_activate_and_tag $1 $my_name $lv_path; then + ocf_log err "Failed second attempt to $1 $lv_path" + return $OCF_ERR_GENERIC + else + ocf_log notice "Second attempt to $1 $lv_path successful" + return $OCF_SUCCESS + fi + else + ocf_log err "Failed to make $OCF_RESKEY_vg_name consistent" + return $OCF_ERR_GENERIC + fi + else + ocf_log err "Failed to $1 $lv_path" + return $OCF_ERR_GENERIC + fi + fi + return $OCF_SUCCESS +} + +function lv_start +{ + if ! lv_activate start; then + return 1 + fi + + return 0 +} + +function lv_stop +{ + if ! lv_activate stop; then + return 1 + fi + + return 0 +} --- cluster/rgmanager/src/resources/lvm_by_vg.sh 2008/01/03 20:35:39 1.1 +++ cluster/rgmanager/src/resources/lvm_by_vg.sh 2008/01/03 21:02:53 1.2 @@ -0,0 +1,266 @@ +#!/bin/bash + +# +# Copyright Red Hat Inc., 2007 +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; see the file COPYING. If not, write to the +# Free Software Foundation, Inc., 675 Mass Ave, Cambridge, +# MA 02139, USA. +# + +# vg_owner +# +# Returns: +# 1 == We are the owner +# 2 == We can claim it +# 0 == Owned by someone else +function vg_owner +{ + local owner=`vgs -o tags --noheadings $OCF_RESKEY_vg_name` + local my_name=$(local_node_name) + + if [ -z $my_name ]; then + ocf_log err "Unable to determine cluster node name" + return 0 + fi + + if [ -z $owner ]; then + # No-one owns this VG yet, so we can claim it + return 2 + fi + + if [ $owner != $my_name ]; then + if is_node_member_clustat $owner ; then + return 0 + fi + return 2 + fi + + return 1 +} + +function strip_tags +{ + local i + + for i in `vgs --noheadings -o tags $OCF_RESKEY_vg_name | sed s/","/" "/g`; do + ocf_log info "Stripping tag, $i" + vgchange --deltag $i + done + + if [ ! -z `vgs -o tags --noheadings $OCF_RESKEY_vg_name` ]; then + ocf_log err "Failed to remove ownership tags from $OCF_RESKEY_vg_name" + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +function strip_and_add_tag +{ + if ! strip_tags; then + ocf_log err "Failed to remove tags from volume group, $OCF_RESKEY_vg_name" + return $OCF_ERR_GENERIC + fi + + if ! vgchange --addtag $(local_node_name) $OCF_RESKEY_vg_name; then + ocf_log err "Failed to add ownership tag to $OCF_RESKEY_vg_name" + return $OCF_ERR_GENERIC + fi + + ocf_log info "New tag \"$(local_node_name)\" added to $OCF_RESKEY_vg_name" + + return $OCF_SUCCESS +} + +# vg_status +# +# Are all the LVs active? +function vg_status +{ + local i + local dev + local readdev + + # + # Check that all LVs are active + # + for i in `lvs $OCF_RESKEY_vg_name --noheadings -o attr`; do + if [[ ! $i =~ ....a. ]]; then + return $OCF_ERR_GENERIC + fi + done + + # + # Check if all links/device nodes are present + # + for i in `lvs $OCF_RESKEY_vg_name --noheadings -o name`; do + dev="/dev/$OCF_RESKEY_vg_name/$i" + + if [ -h $dev ]; then + realdev=$(readlink -f $dev) + if [ $? -ne 0 ]; then + ocf_log err "Failed to follow link, $dev" + return $OCF_ERR_GENERIC + fi + + if [ ! -b $realdev ]; then + ocf_log err "Device node for $dev is not present" + return $OCF_ERR_GENERIC + fi + else + ocf_log err "Symbolic link for $lv_path is not present" + return $OCF_ERR_GENERIC + fi + done + + # + # Verify that we are the correct owner + # + vg_owner + if [ $? -ne 1 ]; then + ocf_log err "WARNING: $OCF_RESKEY_vg_name should not be active" + ocf_log err "WARNING: $my_name does not own $OCF_RESKEY_vg_name" + ocf_log err "WARNING: Attempting shutdown of $OCF_RESKEY_vg_name" + + # FIXME: may need more force to shut this down + vgchange -an $OCF_RESKEY_vg_name + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +function vg_verify +{ + # Anything to verify? + return $OCF_SUCCESS +} + +function vg_start +{ + local a + local results + local all_pvs + local resilience + + ocf_log info "Starting volume group, $OCF_RESKEY_vg_name" + + vg_owner + case $? in + 0) + ocf_log info "Someone else owns this volume group" + return $OCF_ERR_GENERIC + ;; + 1) + ocf_log info "I own this volume group" + ;; + 2) + ocf_log info "I can claim this volume group" + ;; + esac + + if ! strip_and_add_tag || + ! vgchange -ay $OCF_RESKEY_vg_name -vvvv >& /tmp/butt; then + ocf_log err "Failed to activate volume group, $OCF_RESKEY_vg_name" + ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name" + + if ! vgreduce --removemissing --config \ + "activation { volume_list = \"$OCF_RESKEY_vg_name\" }" \ + $OCF_RESKEY_vg_name; then + + ocf_log err "Failed to make $OCF_RESKEY_vg_name consistent" + return $OCF_ERR_GENERIC + fi + + vg_owner + if [ $? -eq 0 ]; then + ocf_log err "Unable to claim ownership of $OCF_RESKEY_vg_name" + return $OCF_ERR_GENERIC + fi + + if ! strip_and_add_tag || + ! vgchange -ay $OCF_RESKEY_vg_name; then + ocf_log err "Failed second attempt to activate $OCF_RESKEY_vg_name" + return $OCF_ERR_GENERIC + fi + + ocf_log notice "Second attempt to activate $OCF_RESKEY_vg_name successful" + return $OCF_SUCCESS + else + # The activation commands succeeded, but did they do anything? + # Make sure all the logical volumes are active + results=(`lvs -o name,attr --noheadings 2> /dev/null $OCF_RESKEY_vg_name`) + a=0 + while [ ! -z ${results[$a]} ]; do + if [[ ! ${results[$(($a + 1))]} =~ ....a. ]]; then + all_pvs=(`pvs --noheadings -o name 2> /dev/null`) + resilience=" --config devices{filter=[" + for i in ${all_pvs[*]}; do + resilience=$resilience'"a|'$i'|",' + done + resilience=$resilience"\"r|.*|\"]}" + + vgchange -ay $OCF_RESKEY_vg_name $resilience + break + fi + a=$(($a + 2)) + done + + # We need to check the LVs again if we made the command resilient + if [ ! -z $resilience ]; then + results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name $resilience 2> /dev/null`) + a=0 + while [ ! -z ${results[$a]} ]; do + if [[ ! ${results[$(($a + 1))]} =~ ....a. ]]; then + ocf_log err "Failed to activate $OCF_RESKEY_vg_name" + return $OCF_ERR_GENERIC + fi + a=$(($a + 2)) + done + ocf_log err "Orphan storage device in $OCF_RESKEY_vg_name slowing operations" + fi + fi + + return $OCF_SUCCESS +} + +function vg_stop +{ + local a + local results + + # Shut down the volume group + # Do we need to make this resilient? + vgchange -an $OCF_RESKEY_vg_name + + # Make sure all the logical volumes are inactive + results=(`lvs -o name,attr --noheadings 2> /dev/null $OCF_RESKEY_vg_name`) + a=0 + while [ ! -z ${results[$a]} ]; do + if [[ ${results[$(($a + 1))]} =~ ....a. ]]; then + ocf_log err "Logical volume $OCF_RESKEY_vg_name/${results[$a]} failed to shutdown" + return $OCF_ERR_GENERIC + fi + a=$(($a + 2)) + done + + # Make sure we are the owner before we strip the tags + vg_owner + if [ $? -ne 0 ]; then + strip_tags + fi + + return $OCF_SUCCESS +} --- cluster/rgmanager/src/resources/Makefile 2007/12/22 13:36:59 1.24 +++ cluster/rgmanager/src/resources/Makefile 2008/01/03 21:02:53 1.25 @@ -17,12 +17,12 @@ RESOURCES=fs.sh service.sh ip.sh nfsclient.sh nfsexport.sh \ script.sh netfs.sh clusterfs.sh smb.sh \ apache.sh openldap.sh samba.sh mysql.sh \ - postgres-8.sh tomcat-5.sh lvm.sh vm.sh \ - SAPInstance SAPDatabase named.sh + postgres-8.sh tomcat-5.sh lvm.sh lvm_by_lv.sh lvm_by_vg.sh \ + vm.sh SAPInstance SAPDatabase named.sh METADATA=apache.metadata openldap.metadata samba.metadata \ mysql.metadata postgres-8.metadata tomcat-5.metadata \ - named.metadata + named.metadata lvm.metadata TARGETS=ocf-shellfuncs svclib_nfslock default_event_script.sl --- cluster/rgmanager/src/resources/lvm.sh 2008/01/03 20:56:49 1.11 +++ cluster/rgmanager/src/resources/lvm.sh 2008/01/03 21:02:53 1.12 @@ -146,7 +146,7 @@ rv=0 ;; -verify-all) +validate-all|verify-all) ## # We can safely ignore clustered volume groups (VGs handled by CLVM) ##