From mboxrd@z Thu Jan 1 00:00:00 1970 From: jbrassow@sourceware.org Date: 3 Jan 2008 20:35:41 -0000 Subject: [Cluster-devel] cluster/rgmanager/src/resources Makefile No ta ... Message-ID: <20080103203541.17707.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Branch: RHEL4 Changes by: jbrassow at sourceware.org 2008-01-03 20:35:39 Modified files: rgmanager/src/resources: Makefile No tag lvm.sh Added files: rgmanager/src/resources: lvm.metadata lvm_by_lv.sh lvm_by_vg.sh Log message: BUG 427377 HA LVM now allows multiple LVs/VG as long as they move together Package builder, please note the addition of 3 new files. Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/lvm.metadata.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=NONE&r2=1.1.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/lvm_by_lv.sh.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=NONE&r2=1.1.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/lvm_by_vg.sh.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=NONE&r2=1.1.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/Makefile.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.4.2.11&r2=1.4.2.12 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/lvm.sh.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.9&r2=1.10 --- cluster/rgmanager/src/resources/Makefile 2007/09/25 14:00:42 1.4.2.11 +++ cluster/rgmanager/src/resources/Makefile 2008/01/03 20:35:39 1.4.2.12 @@ -20,10 +20,12 @@ RESOURCES=fs.sh service.sh ip.sh nfsclient.sh nfsexport.sh \ script.sh netfs.sh clusterfs.sh smb.sh \ apache.sh openldap.sh samba.sh mysql.sh \ - postgres-8.sh tomcat-5.sh lvm.sh SAPInstance SAPDatabase + postgres-8.sh tomcat-5.sh lvm.sh lvm_by_lv.sh lvm_by_vg.sh \ + SAPInstance SAPDatabase METADATA=apache.metadata openldap.metadata samba.metadata \ - mysql.metadata postgres-8.metadata tomcat-5.metadata + mysql.metadata postgres-8.metadata tomcat-5.metadata \ + lvm.metadata TARGETS=${RESOURCES} ocf-shellfuncs svclib_nfslock --- cluster/rgmanager/src/resources/lvm.sh 2007/07/02 21:58:34 1.9 +++ cluster/rgmanager/src/resources/lvm.sh 2008/01/03 20:35:39 1.10 @@ -21,19 +21,7 @@ # # LVM Failover Script. -# -# This script correctly handles: -# - Relocation -# - Fail-over -# - Disk failure + Fail-over -# If you don't know what those mean, ASK! (jbrassow at redhat.com) # NOTE: Changes to /etc/lvm/lvm.conf are required for proper operation. -# -# This script should handle (but doesn't right now): -# - Operations on VG level. Make lv_name optional. This would have -# the effect of moving all LVs in a VG, not just one LV - - LC_ALL=C LANG=C @@ -42,451 +30,82 @@ . $(dirname $0)/ocf-shellfuncs . $(dirname $0)/utils/member_util.sh +. $(dirname $0)/lvm_by_lv.sh +. $(dirname $0)/lvm_by_vg.sh rv=0 -meta_data() -{ - cat < - - 1.0 - - - This defines a LVM volume group that is ... - - - - LVM Failover script - - - - - - Descriptive name LVM Volume group - - - Name - - - - - - - If you can see this, your GUI is broken. - - - If you can see this, your GUI is broken. - - - - - - - If you can see this, your GUI is broken. - - - If you can see this, your GUI is broken. - - - - - - - If set and unmounting the file system fails, the node will - try to kill lockd and issue reclaims across all remaining - network interface cards. - - - Enable NFS lock workarounds - - - - - - - - - - - - - - - - - - - - - - -EOT -} - -# verify_all -# -# Verify the parameters passed in -# -verify_all() -{ - declare lv_path="$OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" - declare -i ret=0 - - # Anything to verify? Perhaps the names? - ocf_log notice "Verifying $lv_path" - - return $ret -} - -vg_status() -{ - return $OCF_ERR_GENERIC -} - -vg_activate() -{ - return $OCF_ERR_GENERIC -} - -# lvm_exec_resilient -# -# Sometimes, devices can come back. Their metadata will conflict -# with the good devices that remain. This function filters out those -# failed devices when executing the given command -# -# Finishing with vgscan resets the cache/filter -lvm_exec_resilient() -{ - declare command=$1 - declare all_pvs - - ocf_log notice "Making resilient : $command" - - if [ -z $command ]; then - ocf_log err "lvm_exec_resilient: Arguments not supplied" - return $OCF_ERR_ARGS - fi - - # pvs will print out only those devices that are valid - # If a device dies and comes back, it will not appear - # in pvs output (but you will get a Warning). - all_pvs=(`pvs --noheadings -o pv_name | grep -v Warning`) - - # Now we use those valid devices in a filter which we set up. - # The device will then be activated because there are no - # metadata conflicts. - command=$command" --config devices{filter=["; - for i in ${all_pvs[*]}; do - command=$command'"a|'$i'|",' - done - command=$command"\"r|.*|\"]}" - - ocf_log notice "Resilient command: $command" - if ! $command ; then - ocf_log err "lvm_exec_resilient failed" - vgscan - return $OCF_ERR_GENERIC - else - vgscan - return $OCF_SUCCESS - fi -} - -# lv_activate_resilient -# -# Sometimes, devices can come back. Their metadata will conflict -# with the good devices that remain. We must filter out those -# failed devices when trying to reactivate -lv_activate_resilient() -{ - declare action=$1 - declare lv_path=$2 - declare op="-ay" - - if [ -z $action ] || [ -z $lv_path ]; then - ocf_log err "lv_activate_resilient: Arguments not supplied" - return $OCF_ERR_ARGS - fi - - if [ $action != "start" ]; then - op="-an" - fi - - if ! lvm_exec_resilient "lvchange $op $lv_path" ; then - ocf_log err "lv_activate_resilient $action failed on $lv_path" - return $OCF_ERR_GENERIC - else - return $OCF_SUCCESS - fi -} - -# lv_status +################################################################################ +# clvm_check # -# Is the LV active? -lv_status() -{ - declare lv_path="$OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" - declare dev="/dev/$lv_path" - declare realdev - declare owner - declare my_name - - # - # Check if device is active - # - if [[ ! $(lvs -o attr --noheadings $lv_path) =~ ....a. ]]; then - return $OCF_ERR_GENERIC - fi - - if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then - ocf_log notice "$OCF_RESKEY_vg_name is a cluster volume. Ignoring..." - return $OCF_SUCCESS - fi - - # - # Check if all links/device nodes are present - # - if [ -h "$dev" ]; then - realdev=$(readlink -f $dev) - if [ $? -ne 0 ]; then - ocf_log err "Failed to follow link, $dev" - return $OCF_ERR_ARGS - fi - - if [ ! -b $realdev ]; then - ocf_log err "Device node for $lv_path is not present" - return $OCF_ERR_GENERIC - fi - else - ocf_log err "Symbolic link for $lv_path is not present" - return $OCF_ERR_GENERIC - fi - - # - # Verify that we are the correct owner - # - owner=`lvs -o tags --noheadings $lv_path` - my_name=$(local_node_name) - if [ -z $my_name ]; then - ocf_log err "Unable to determine local machine name" - - # FIXME: I don't really want to fail on 1st offense - return $OCF_SUCCESS - fi - - if [ -z $owner ] || [ $my_name != $owner ]; then - ocf_log err "WARNING: $lv_path should not be active" - ocf_log err "WARNING: $my_name does not own $lv_path" - ocf_log err "WARNING: Attempting shutdown of $lv_path" - - lv_activate_resilient "stop" $lv_path - return $OCF_ERR_GENERIC - fi - - return $OCF_SUCCESS -} - -# lv_activate_and_tag -lv_activate_and_tag() +################################################################################ +function clvm_check { - declare action=$1 - declare tag=$2 - declare lv_path=$3 - - if [ -z $action ] || [ -z $tag ] || [ -z $lv_path ]; then - ocf_log err "Supplied args: 1) $action, 2) $tag, 3) $lv_path" - return $OCF_ERR_ARGS - fi - - if [ $action == "start" ]; then - ocf_log notice "Activating $lv_path" - lvchange --addtag $tag $lv_path - if [ $? -ne 0 ]; then - ocf_log err "Unable to add tag to $lv_path" - return $OCF_ERR_GENERIC - fi - - if ! lv_activate_resilient $action $lv_path; then - ocf_log err "Unable to activate $lv_path" - return $OCF_ERR_GENERIC - fi - else - ocf_log notice "Deactivating $lv_path" - if ! lv_activate_resilient $action $lv_path; then - ocf_log err "Unable to deactivate $lv_path" - return $OCF_ERR_GENERIC - fi - - ocf_log notice "Removing ownership tag ($tag) from $lv_path" - - lvchange --deltag $tag $lv_path - if [ $? -ne 0 ]; then - ocf_log err "Unable to delete tag from $lv_path" - return $OCF_ERR_GENERIC - fi + if [[ $(vgs -o attr --noheadings $1) =~ .....c ]]; then + return 1 fi - return $OCF_SUCCESS + return 0 } -# lv_activate -# $1: start/stop only -# -# Basically, if we want to [de]activate an LVM volume, -# we must own it. That means that our tag must be on it. -# This requires a change to /etc/lvm/lvm.conf: -# volume_list = [ "root_volume", "@my_hostname" ] -# where "root_volume" is your root volume group and -# "my_hostname" is $(local_node_name) +################################################################################ +# ha_lvm_proper_setup_check # -# If there is a node failure, we may wish to "steal" the -# LV. For that, we need to check if the node that owns -# it is still part of the cluster. We use the tag to -# determine who owns the volume then query for their -# liveness. If they are dead, we can steal. -lv_activate() -{ - declare lv_path="$OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" - declare owner=`lvs -o tags --noheadings $lv_path` - declare my_name=$(local_node_name) - - if [ -z $my_name ]; then - ocf_log err "Unable to determine cluster node name" - return $OCF_ERR_GENERIC - fi - - # - # FIXME: This code block is repeated below... might be - # nice to put it in a function - # - if [ ! -z $owner ] && [ $owner != $my_name ]; then - if is_node_member_clustat $owner ; then - ocf_log err "$owner owns $lv_path unable to $1" - return $OCF_ERR_GENERIC - fi - ocf_log notice "Owner of $lv_path is not in the cluster" - ocf_log notice "Stealing $lv_path" - - lvchange --deltag $owner $lv_path - if [ $? -ne 0 ]; then - ocf_log err "Failed to steal $lv_path from $owner" - return $OCF_ERR_GENERIC - fi - - # Warning --deltag doesn't always result in failure - if [ ! -z `lvs -o tags --noheadings $lv_path` ]; then - ocf_log err "Failed to steal $lv_path from $owner." - return $OCF_ERR_GENERIC - fi - fi - - if ! lv_activate_and_tag $1 $my_name $lv_path; then - ocf_log err "Failed to $1 $lv_path" - - if [ "$1" == "start" ]; then - ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name" - - if vgreduce --removemissing --config \ - "activation { volume_list = \"$OCF_RESKEY_vg_name\" }" \ - $OCF_RESKEY_vg_name; then - ocf_log notice "$OCF_RESKEY_vg_name now consistent" - owner=`lvs -o tags --noheadings $lv_path` - if [ ! -z $owner ] && [ $owner != $my_name ]; then - if is_node_member_clustat $owner ; then - ocf_log err "$owner owns $lv_path unable to $1" - return $OCF_ERR_GENERIC - fi - ocf_log notice "Owner of $lv_path is not in the cluster" - ocf_log notice "Stealing $lv_path" - - lvchange --deltag $owner $lv_path - if [ $? -ne 0 ]; then - ocf_log err "Failed to steal $lv_path from $owner" - return $OCF_ERR_GENERIC - fi - - # Warning --deltag doesn't always result in failure - if [ ! -z `lvs -o tags --noheadings $lv_path` ]; then - ocf_log err "Failed to steal $lv_path from $owner." - return $OCF_ERR_GENERIC - fi - fi - - if ! lv_activate_and_tag $1 $my_name $lv_path; then - ocf_log err "Failed second attempt to $1 $lv_path" - return $OCF_ERR_GENERIC - else - ocf_log notice "Second attempt to $1 $lv_path successful" - return $OCF_SUCCESS - fi - else - ocf_log err "Failed to make $OCF_RESKEY_vg_name consistent" - return $OCF_ERR_GENERIC - fi - else - ocf_log err "Failed to $1 $lv_path" - return $OCF_ERR_GENERIC - fi - fi - return $OCF_SUCCESS -} - -ha_lvm_proper_setup_check() +################################################################################ +function ha_lvm_proper_setup_check { - # First, let's check that they have setup their lvm.conf correctly + ## + # Machine's cluster node name must be present as + # a tag in lvm.conf:activation/volume_list + ## if ! lvm dumpconfig activation/volume_list >& /dev/null || ! lvm dumpconfig activation/volume_list | grep $(local_node_name); then ocf_log err "lvm.conf improperly configured for HA LVM." return $OCF_ERR_GENERIC fi + ## # Next, we need to ensure that their initrd has been updated - if [ -e /boot/initrd-`uname -r`.img ]; then - if [ "$(find /boot/initrd-`uname -r`.img -newer /etc/lvm/lvm.conf)" == "" ]; then - ocf_log err "HA LVM requires the initrd image to be newer than lvm.conf" - return $OCF_ERR_GENERIC - fi - else - # Best guess... - if [ "$(find /boot/*.img -newer /etc/lvm/lvm.conf)" == "" ]; then - ocf_log err "HA LVM requires the initrd image to be newer than lvm.conf" - return $OCF_ERR_GENERIC - fi + # If not, the machine could boot and activate the VG outside + # the control of rgmanager + ## + # Fixme: we might be able to perform a better check... + if [ "$(find /boot/*.img -newer /etc/lvm/lvm.conf)" == "" ]; then + ocf_log err "HA LVM requires the initrd image to be newer than lvm.conf" + return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } +################################################################################ +# MAIN +################################################################################ + case $1 in start) - if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then + ## + # We can safely ignore clustered volume groups (VGs handled by CLVM) + ## + if ! clvm_check $OCF_RESKEY_vg_name; then ocf_log notice "$OCF_RESKEY_vg_name is a cluster volume. Ignoring..." exit 0 fi - if ! lvs $OCF_RESKEY_vg_name >& /dev/null; then - lv_count=0 - else - lv_count=`lvs --noheadings -o name $OCF_RESKEY_vg_name | grep -v _mlog | grep -v _mimage | grep -v nconsistent | wc -l` - fi - if [ $lv_count -gt 1 ]; then - ocf_log err "HA LVM requires Only one logical volume per volume group." - ocf_log err "There are currently $lv_count logical volumes in $OCF_RESKEY_vg_name" - ocf_log err "Failing HA LVM start of $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" - exit $OCF_ERR_GENERIC - fi ha_lvm_proper_setup_check || exit 1 - + + rv=0 + if [ -z $OCF_RESKEY_lv_name ]; then - vg_activate start || exit 1 + vg_start || exit 1 else - lv_activate start || exit 1 + lv_start || exit 1 fi - rv=0 ;; status|monitor) + ocf_log notice "Getting status" + if [ -z $OCF_RESKEY_lv_name ]; then vg_status || exit 1 else @@ -496,7 +115,10 @@ ;; stop) - if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then + ## + # We can safely ignore clustered volume groups (VGs handled by CLVM) + ## + if ! clvm_check $OCF_RESKEY_vg_name; then ocf_log notice "$OCF_RESKEY_vg_name is a cluster volume. Ignoring..." exit 0 fi @@ -506,9 +128,9 @@ fi if [ -z $OCF_RESKEY_lv_name ]; then - vg_activate stop || exit 1 + vg_stop || exit 1 else - lv_activate stop || exit 1 + lv_stop || exit 1 fi rv=0 ;; @@ -520,18 +142,25 @@ ;; meta-data) - meta_data + cat `echo $0 | sed 's/^\(.*\)\.sh$/\1.metadata/'` rv=0 ;; validate-all) - if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then + ## + # We can safely ignore clustered volume groups (VGs handled by CLVM) + ## + if ! clvm_check $OCF_RESKEY_vg_name; then ocf_log notice "$OCF_RESKEY_vg_name is a cluster volume. Ignoring..." exit 0 fi - verify_all - rv=$? + if [ -z $OCF_RESKEY_lv_name ]; then + vg_verify || exit 1 + else + lv_verify || exit 1 + fi + rv=0 ;; *) echo "usage: $0 {start|status|monitor|stop|restart|meta-data|validate-all}"