* [Cluster-devel] cluster/rgmanager/src/resources Makefile lvm.s ...
@ 2008-01-03 20:45 jbrassow
0 siblings, 0 replies; 2+ messages in thread
From: jbrassow @ 2008-01-03 20:45 UTC (permalink / raw)
To: cluster-devel.redhat.com
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL5
Changes by: jbrassow at sourceware.org 2008-01-03 20:45:37
Modified files:
rgmanager/src/resources: Makefile lvm.sh
Added files:
rgmanager/src/resources: lvm.metadata lvm_by_lv.sh lvm_by_vg.sh
Log message:
Bug 427378
HA LVM now allows multiple LVs/VG as long as they move together
Package builder, note the extra files.
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/lvm.metadata.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=NONE&r2=1.1.4.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/lvm_by_lv.sh.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=NONE&r2=1.1.4.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/lvm_by_vg.sh.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=NONE&r2=1.1.4.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/Makefile.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.13.2.7&r2=1.13.2.8
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/lvm.sh.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.6.6&r2=1.1.6.7
--- cluster/rgmanager/src/resources/Makefile 2007/12/04 21:59:54 1.13.2.7
+++ cluster/rgmanager/src/resources/Makefile 2008/01/03 20:45:37 1.13.2.8
@@ -20,12 +20,12 @@
RESOURCES=fs.sh service.sh ip.sh nfsclient.sh nfsexport.sh \
script.sh netfs.sh clusterfs.sh smb.sh \
apache.sh openldap.sh samba.sh mysql.sh \
- postgres-8.sh tomcat-5.sh lvm.sh vm.sh \
- SAPInstance SAPDatabase named.sh
+ postgres-8.sh tomcat-5.sh lvm.sh lvm_by_lv.sh lvm_by_vg.sh \
+ vm.sh SAPInstance SAPDatabase named.sh
METADATA=apache.metadata openldap.metadata samba.metadata \
mysql.metadata postgres-8.metadata tomcat-5.metadata \
- named.metadata
+ named.metadata lvm.metadata
TARGETS=${RESOURCES} ocf-shellfuncs svclib_nfslock
--- cluster/rgmanager/src/resources/lvm.sh 2007/07/02 21:59:04 1.1.6.6
+++ cluster/rgmanager/src/resources/lvm.sh 2008/01/03 20:45:37 1.1.6.7
@@ -21,19 +21,7 @@
#
# LVM Failover Script.
-#
-# This script correctly handles:
-# - Relocation
-# - Fail-over
-# - Disk failure + Fail-over
-# If you don't know what those mean, ASK! (jbrassow at redhat.com)
# NOTE: Changes to /etc/lvm/lvm.conf are required for proper operation.
-#
-# This script should handle (but doesn't right now):
-# - Operations on VG level. Make lv_name optional. This would have
-# the effect of moving all LVs in a VG, not just one LV
-
-
LC_ALL=C
LANG=C
@@ -42,451 +30,82 @@
. $(dirname $0)/ocf-shellfuncs
. $(dirname $0)/utils/member_util.sh
+. $(dirname $0)/lvm_by_lv.sh
+. $(dirname $0)/lvm_by_vg.sh
rv=0
-meta_data()
-{
- cat <<EOT
-<?xml version="1.0" ?>
-<resource-agent name="lvm" version="rgmanager 2.0">
- <version>1.0</version>
-
- <longdesc lang="en">
- This defines a LVM volume group that is ...
- </longdesc>
-
- <shortdesc lang="en">
- LVM Failover script
- </shortdesc>
-
- <parameters>
- <parameter name="name" primary="1">
- <longdesc lang="en">
- Descriptive name LVM Volume group
- </longdesc>
- <shortdesc lang="en">
- Name
- </shortdesc>
- <content type="string"/>
- </parameter>
-
- <parameter name="vg_name" required="1">
- <longdesc lang="en">
- If you can see this, your GUI is broken.
- </longdesc>
- <shortdesc lang="en">
- If you can see this, your GUI is broken.
- </shortdesc>
- <content type="string"/>
- </parameter>
-
- <parameter name="lv_name" required="1">
- <longdesc lang="en">
- If you can see this, your GUI is broken.
- </longdesc>
- <shortdesc lang="en">
- If you can see this, your GUI is broken.
- </shortdesc>
- <content type="string"/>
- </parameter>
-
- <parameter name="nfslock" inherit="service%nfslock">
- <longdesc lang="en">
- If set and unmounting the file system fails, the node will
- try to kill lockd and issue reclaims across all remaining
- network interface cards.
- </longdesc>
- <shortdesc lang="en">
- Enable NFS lock workarounds
- </shortdesc>
- <content type="boolean"/>
- </parameter>
-
- </parameters>
-
- <actions>
- <action name="start" timeout="5"/>
- <action name="stop" timeout="5"/>
-
- <action name="status" timeout="5" interval="1h"/>
- <action name="monitor" timeout="5" interval="1h"/>
-
- <action name="meta-data" timeout="5"/>
- <action name="verify-all" timeout="30"/>
- </actions>
-
- <special tag="rgmanager">
- <attributes maxinstances="1"/>
- </special>
-
-</resource-agent>
-EOT
-}
-
-# verify_all
-#
-# Verify the parameters passed in
-#
-verify_all()
-{
- declare lv_path="$OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name"
- declare -i ret=0
-
- # Anything to verify? Perhaps the names?
- ocf_log notice "Verifying $lv_path"
-
- return $ret
-}
-
-vg_status()
-{
- return $OCF_ERR_GENERIC
-}
-
-vg_activate()
-{
- return $OCF_ERR_GENERIC
-}
-
-# lvm_exec_resilient
-#
-# Sometimes, devices can come back. Their metadata will conflict
-# with the good devices that remain. This function filters out those
-# failed devices when executing the given command
-#
-# Finishing with vgscan resets the cache/filter
-lvm_exec_resilient()
-{
- declare command=$1
- declare all_pvs
-
- ocf_log notice "Making resilient : $command"
-
- if [ -z $command ]; then
- ocf_log err "lvm_exec_resilient: Arguments not supplied"
- return $OCF_ERR_ARGS
- fi
-
- # pvs will print out only those devices that are valid
- # If a device dies and comes back, it will not appear
- # in pvs output (but you will get a Warning).
- all_pvs=(`pvs --noheadings -o pv_name | grep -v Warning`)
-
- # Now we use those valid devices in a filter which we set up.
- # The device will then be activated because there are no
- # metadata conflicts.
- command=$command" --config devices{filter=[";
- for i in ${all_pvs[*]}; do
- command=$command'"a|'$i'|",'
- done
- command=$command"\"r|.*|\"]}"
-
- ocf_log notice "Resilient command: $command"
- if ! $command ; then
- ocf_log err "lvm_exec_resilient failed"
- vgscan
- return $OCF_ERR_GENERIC
- else
- vgscan
- return $OCF_SUCCESS
- fi
-}
-
-# lv_activate_resilient
-#
-# Sometimes, devices can come back. Their metadata will conflict
-# with the good devices that remain. We must filter out those
-# failed devices when trying to reactivate
-lv_activate_resilient()
-{
- declare action=$1
- declare lv_path=$2
- declare op="-ay"
-
- if [ -z $action ] || [ -z $lv_path ]; then
- ocf_log err "lv_activate_resilient: Arguments not supplied"
- return $OCF_ERR_ARGS
- fi
-
- if [ $action != "start" ]; then
- op="-an"
- fi
-
- if ! lvm_exec_resilient "lvchange $op $lv_path" ; then
- ocf_log err "lv_activate_resilient $action failed on $lv_path"
- return $OCF_ERR_GENERIC
- else
- return $OCF_SUCCESS
- fi
-}
-
-# lv_status
+################################################################################
+# clvm_check
#
-# Is the LV active?
-lv_status()
-{
- declare lv_path="$OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name"
- declare dev="/dev/$lv_path"
- declare realdev
- declare owner
- declare my_name
-
- #
- # Check if device is active
- #
- if [[ ! $(lvs -o attr --noheadings $lv_path) =~ ....a. ]]; then
- return $OCF_ERR_GENERIC
- fi
-
- if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then
- ocf_log notice "$OCF_RESKEY_vg_name is a cluster volume. Ignoring..."
- return $OCF_SUCCESS
- fi
-
- #
- # Check if all links/device nodes are present
- #
- if [ -h "$dev" ]; then
- realdev=$(readlink -f $dev)
- if [ $? -ne 0 ]; then
- ocf_log err "Failed to follow link, $dev"
- return $OCF_ERR_ARGS
- fi
-
- if [ ! -b $realdev ]; then
- ocf_log err "Device node for $lv_path is not present"
- return $OCF_ERR_GENERIC
- fi
- else
- ocf_log err "Symbolic link for $lv_path is not present"
- return $OCF_ERR_GENERIC
- fi
-
- #
- # Verify that we are the correct owner
- #
- owner=`lvs -o tags --noheadings $lv_path`
- my_name=$(local_node_name)
- if [ -z $my_name ]; then
- ocf_log err "Unable to determine local machine name"
-
- # FIXME: I don't really want to fail on 1st offense
- return $OCF_SUCCESS
- fi
-
- if [ -z $owner ] || [ $my_name != $owner ]; then
- ocf_log err "WARNING: $lv_path should not be active"
- ocf_log err "WARNING: $my_name does not own $lv_path"
- ocf_log err "WARNING: Attempting shutdown of $lv_path"
-
- lv_activate_resilient "stop" $lv_path
- return $OCF_ERR_GENERIC
- fi
-
- return $OCF_SUCCESS
-}
-
-# lv_activate_and_tag
-lv_activate_and_tag()
+################################################################################
+function clvm_check
{
- declare action=$1
- declare tag=$2
- declare lv_path=$3
-
- if [ -z $action ] || [ -z $tag ] || [ -z $lv_path ]; then
- ocf_log err "Supplied args: 1) $action, 2) $tag, 3) $lv_path"
- return $OCF_ERR_ARGS
- fi
-
- if [ $action == "start" ]; then
- ocf_log notice "Activating $lv_path"
- lvchange --addtag $tag $lv_path
- if [ $? -ne 0 ]; then
- ocf_log err "Unable to add tag to $lv_path"
- return $OCF_ERR_GENERIC
- fi
-
- if ! lv_activate_resilient $action $lv_path; then
- ocf_log err "Unable to activate $lv_path"
- return $OCF_ERR_GENERIC
- fi
- else
- ocf_log notice "Deactivating $lv_path"
- if ! lv_activate_resilient $action $lv_path; then
- ocf_log err "Unable to deactivate $lv_path"
- return $OCF_ERR_GENERIC
- fi
-
- ocf_log notice "Removing ownership tag ($tag) from $lv_path"
-
- lvchange --deltag $tag $lv_path
- if [ $? -ne 0 ]; then
- ocf_log err "Unable to delete tag from $lv_path"
- return $OCF_ERR_GENERIC
- fi
+ if [[ $(vgs -o attr --noheadings $1) =~ .....c ]]; then
+ return 1
fi
- return $OCF_SUCCESS
+ return 0
}
-# lv_activate
-# $1: start/stop only
-#
-# Basically, if we want to [de]activate an LVM volume,
-# we must own it. That means that our tag must be on it.
-# This requires a change to /etc/lvm/lvm.conf:
-# volume_list = [ "root_volume", "@my_hostname" ]
-# where "root_volume" is your root volume group and
-# "my_hostname" is $(local_node_name)
+################################################################################
+# ha_lvm_proper_setup_check
#
-# If there is a node failure, we may wish to "steal" the
-# LV. For that, we need to check if the node that owns
-# it is still part of the cluster. We use the tag to
-# determine who owns the volume then query for their
-# liveness. If they are dead, we can steal.
-lv_activate()
-{
- declare lv_path="$OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name"
- declare owner=`lvs -o tags --noheadings $lv_path`
- declare my_name=$(local_node_name)
-
- if [ -z $my_name ]; then
- ocf_log err "Unable to determine cluster node name"
- return $OCF_ERR_GENERIC
- fi
-
- #
- # FIXME: This code block is repeated below... might be
- # nice to put it in a function
- #
- if [ ! -z $owner ] && [ $owner != $my_name ]; then
- if is_node_member_clustat $owner ; then
- ocf_log err "$owner owns $lv_path unable to $1"
- return $OCF_ERR_GENERIC
- fi
- ocf_log notice "Owner of $lv_path is not in the cluster"
- ocf_log notice "Stealing $lv_path"
-
- lvchange --deltag $owner $lv_path
- if [ $? -ne 0 ]; then
- ocf_log err "Failed to steal $lv_path from $owner"
- return $OCF_ERR_GENERIC
- fi
-
- # Warning --deltag doesn't always result in failure
- if [ ! -z `lvs -o tags --noheadings $lv_path` ]; then
- ocf_log err "Failed to steal $lv_path from $owner."
- return $OCF_ERR_GENERIC
- fi
- fi
-
- if ! lv_activate_and_tag $1 $my_name $lv_path; then
- ocf_log err "Failed to $1 $lv_path"
-
- if [ "$1" == "start" ]; then
- ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name"
-
- if vgreduce --removemissing --config \
- "activation { volume_list = \"$OCF_RESKEY_vg_name\" }" \
- $OCF_RESKEY_vg_name; then
- ocf_log notice "$OCF_RESKEY_vg_name now consistent"
- owner=`lvs -o tags --noheadings $lv_path`
- if [ ! -z $owner ] && [ $owner != $my_name ]; then
- if is_node_member_clustat $owner ; then
- ocf_log err "$owner owns $lv_path unable to $1"
- return $OCF_ERR_GENERIC
- fi
- ocf_log notice "Owner of $lv_path is not in the cluster"
- ocf_log notice "Stealing $lv_path"
-
- lvchange --deltag $owner $lv_path
- if [ $? -ne 0 ]; then
- ocf_log err "Failed to steal $lv_path from $owner"
- return $OCF_ERR_GENERIC
- fi
-
- # Warning --deltag doesn't always result in failure
- if [ ! -z `lvs -o tags --noheadings $lv_path` ]; then
- ocf_log err "Failed to steal $lv_path from $owner."
- return $OCF_ERR_GENERIC
- fi
- fi
-
- if ! lv_activate_and_tag $1 $my_name $lv_path; then
- ocf_log err "Failed second attempt to $1 $lv_path"
- return $OCF_ERR_GENERIC
- else
- ocf_log notice "Second attempt to $1 $lv_path successful"
- return $OCF_SUCCESS
- fi
- else
- ocf_log err "Failed to make $OCF_RESKEY_vg_name consistent"
- return $OCF_ERR_GENERIC
- fi
- else
- ocf_log err "Failed to $1 $lv_path"
- return $OCF_ERR_GENERIC
- fi
- fi
- return $OCF_SUCCESS
-}
-
-ha_lvm_proper_setup_check()
+################################################################################
+function ha_lvm_proper_setup_check
{
- # First, let's check that they have setup their lvm.conf correctly
+ ##
+ # Machine's cluster node name must be present as
+ # a tag in lvm.conf:activation/volume_list
+ ##
if ! lvm dumpconfig activation/volume_list >& /dev/null ||
! lvm dumpconfig activation/volume_list | grep $(local_node_name); then
ocf_log err "lvm.conf improperly configured for HA LVM."
return $OCF_ERR_GENERIC
fi
+ ##
# Next, we need to ensure that their initrd has been updated
- if [ -e /boot/initrd-`uname -r`.img ]; then
- if [ "$(find /boot/initrd-`uname -r`.img -newer /etc/lvm/lvm.conf)" == "" ]; then
- ocf_log err "HA LVM requires the initrd image to be newer than lvm.conf"
- return $OCF_ERR_GENERIC
- fi
- else
- # Best guess...
- if [ "$(find /boot/*.img -newer /etc/lvm/lvm.conf)" == "" ]; then
- ocf_log err "HA LVM requires the initrd image to be newer than lvm.conf"
- return $OCF_ERR_GENERIC
- fi
+ # If not, the machine could boot and activate the VG outside
+ # the control of rgmanager
+ ##
+ # Fixme: we might be able to perform a better check...
+ if [ "$(find /boot/*.img -newer /etc/lvm/lvm.conf)" == "" ]; then
+ ocf_log err "HA LVM requires the initrd image to be newer than lvm.conf"
+ return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
+################################################################################
+# MAIN
+################################################################################
+
case $1 in
start)
- if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then
+ ##
+ # We can safely ignore clustered volume groups (VGs handled by CLVM)
+ ##
+ if ! clvm_check $OCF_RESKEY_vg_name; then
ocf_log notice "$OCF_RESKEY_vg_name is a cluster volume. Ignoring..."
exit 0
fi
- if ! lvs $OCF_RESKEY_vg_name >& /dev/null; then
- lv_count=0
- else
- lv_count=`lvs --noheadings -o name $OCF_RESKEY_vg_name | grep -v _mlog | grep -v _mimage | grep -v nconsistent | wc -l`
- fi
- if [ $lv_count -gt 1 ]; then
- ocf_log err "HA LVM requires Only one logical volume per volume group."
- ocf_log err "There are currently $lv_count logical volumes in $OCF_RESKEY_vg_name"
- ocf_log err "Failing HA LVM start of $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name"
- exit $OCF_ERR_GENERIC
- fi
ha_lvm_proper_setup_check || exit 1
-
+
+ rv=0
+
if [ -z $OCF_RESKEY_lv_name ]; then
- vg_activate start || exit 1
+ vg_start || exit 1
else
- lv_activate start || exit 1
+ lv_start || exit 1
fi
- rv=0
;;
status|monitor)
+ ocf_log notice "Getting status"
+
if [ -z $OCF_RESKEY_lv_name ]; then
vg_status || exit 1
else
@@ -496,7 +115,10 @@
;;
stop)
- if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then
+ ##
+ # We can safely ignore clustered volume groups (VGs handled by CLVM)
+ ##
+ if ! clvm_check $OCF_RESKEY_vg_name; then
ocf_log notice "$OCF_RESKEY_vg_name is a cluster volume. Ignoring..."
exit 0
fi
@@ -506,9 +128,9 @@
fi
if [ -z $OCF_RESKEY_lv_name ]; then
- vg_activate stop || exit 1
+ vg_stop || exit 1
else
- lv_activate stop || exit 1
+ lv_stop || exit 1
fi
rv=0
;;
@@ -520,22 +142,29 @@
;;
meta-data)
- meta_data
+ cat `echo $0 | sed 's/^\(.*\)\.sh$/\1.metadata/'`
rv=0
;;
verify-all)
- if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then
+ ##
+ # We can safely ignore clustered volume groups (VGs handled by CLVM)
+ ##
+ if ! clvm_check $OCF_RESKEY_vg_name; then
ocf_log notice "$OCF_RESKEY_vg_name is a cluster volume. Ignoring..."
exit 0
fi
- verify_all
- rv=$?
+ if [ -z $OCF_RESKEY_lv_name ]; then
+ vg_verify || exit 1
+ else
+ lv_verify || exit 1
+ fi
+ rv=0
;;
*)
echo "usage: $0 {start|status|monitor|stop|restart|meta-data|verify-all}"
- exit $OCF_ERR_GENERIC
+ exit $OCF_ERR_UNIMPLEMENTED
;;
esac
^ permalink raw reply [flat|nested] 2+ messages in thread* [Cluster-devel] cluster/rgmanager/src/resources Makefile lvm.s ...
@ 2008-01-03 21:02 jbrassow
0 siblings, 0 replies; 2+ messages in thread
From: jbrassow @ 2008-01-03 21:02 UTC (permalink / raw)
To: cluster-devel.redhat.com
CVSROOT: /cvs/cluster
Module name: cluster
Changes by: jbrassow at sourceware.org 2008-01-03 21:02:53
Modified files:
rgmanager/src/resources: Makefile lvm.sh
Added files:
rgmanager/src/resources: lvm.metadata lvm_by_lv.sh lvm_by_vg.sh
Log message:
lvm resource script now allows multiple LVs per VG as long as they move
together (exist on the same machine).
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/lvm.metadata.diff?cvsroot=cluster&r1=1.1&r2=1.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/lvm_by_lv.sh.diff?cvsroot=cluster&r1=1.1&r2=1.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/lvm_by_vg.sh.diff?cvsroot=cluster&r1=1.1&r2=1.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/Makefile.diff?cvsroot=cluster&r1=1.24&r2=1.25
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/lvm.sh.diff?cvsroot=cluster&r1=1.11&r2=1.12
--- cluster/rgmanager/src/resources/lvm.metadata 2008/01/03 20:35:39 1.1
+++ cluster/rgmanager/src/resources/lvm.metadata 2008/01/03 21:02:53 1.2
@@ -0,0 +1,73 @@
+<?xml version="1.0" ?>
+<resource-agent name="lvm" version="rgmanager 2.0">
+ <version>1.0</version>
+
+ <longdesc lang="en">
+ This defines a LVM volume group that is ...
+ </longdesc>
+
+ <shortdesc lang="en">
+ LVM Failover script
+ </shortdesc>
+
+ <parameters>
+ <parameter name="name" primary="1">
+ <longdesc lang="en">
+ Descriptive name LVM Volume group
+ </longdesc>
+ <shortdesc lang="en">
+ Name
+ </shortdesc>
+ <content type="string"/>
+ </parameter>
+
+ <parameter name="vg_name" required="1">
+ <longdesc lang="en">
+ If you can see this, your GUI is broken.
+ </longdesc>
+ <shortdesc lang="en">
+ If you can see this, your GUI is broken.
+ </shortdesc>
+ <content type="string"/>
+ </parameter>
+
+ <parameter name="lv_name">
+ <longdesc lang="en">
+ If you can see this, your GUI is broken.
+ </longdesc>
+ <shortdesc lang="en">
+ If you can see this, your GUI is broken.
+ </shortdesc>
+ <content type="string"/>
+ </parameter>
+
+ <parameter name="nfslock" inherit="service%nfslock">
+ <longdesc lang="en">
+ If set and unmounting the file system fails, the node will
+ try to kill lockd and issue reclaims across all remaining
+ network interface cards.
+ </longdesc>
+ <shortdesc lang="en">
+ Enable NFS lock workarounds
+ </shortdesc>
+ <content type="boolean"/>
+ </parameter>
+
+ </parameters>
+
+ <actions>
+ <action name="start" timeout="5"/>
+ <action name="stop" timeout="5"/>
+
+ <action name="status" timeout="5" interval="1h"/>
+ <action name="monitor" timeout="5" interval="1h"/>
+
+ <action name="meta-data" timeout="5"/>
+ <action name="verify-all" timeout="30"/>
+ </actions>
+
+ <special tag="rgmanager">
+ <attributes maxinstances="1"/>
+ </special>
+
+</resource-agent>
--- cluster/rgmanager/src/resources/lvm_by_lv.sh 2008/01/03 20:35:39 1.1
+++ cluster/rgmanager/src/resources/lvm_by_lv.sh 2008/01/03 21:02:53 1.2
@@ -0,0 +1,331 @@
+#!/bin/bash
+
+#
+# Copyright Red Hat Inc., 2007
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; see the file COPYING. If not, write to the
+# Free Software Foundation, Inc., 675 Mass Ave, Cambridge,
+# MA 02139, USA.
+#
+
+# lv_verify
+#
+# Verify the parameters passed in
+#
+lv_verify()
+{
+ # Anything to verify? Perhaps the names?
+ return $OCF_SUCCESS
+}
+
+# lv_exec_resilient
+#
+# Sometimes, devices can come back. Their metadata will conflict
+# with the good devices that remain. This function filters out those
+# failed devices when executing the given command
+#
+# Finishing with vgscan resets the cache/filter
+lv_exec_resilient()
+{
+ declare command=$1
+ declare all_pvs
+
+ ocf_log notice "Making resilient : $command"
+
+ if [ -z $command ]; then
+ ocf_log err "lv_exec_resilient: Arguments not supplied"
+ return $OCF_ERR_ARGS
+ fi
+
+ # pvs will print out only those devices that are valid
+ # If a device dies and comes back, it will not appear
+ # in pvs output (but you will get a Warning).
+ all_pvs=(`pvs --noheadings -o pv_name | grep -v Warning`)
+
+ # Now we use those valid devices in a filter which we set up.
+ # The device will then be activated because there are no
+ # metadata conflicts.
+ command=$command" --config devices{filter=["
+ for i in ${all_pvs[*]}; do
+ command=$command'"a|'$i'|",'
+ done
+ command=$command"\"r|.*|\"]}"
+
+ ocf_log notice "Resilient command: $command"
+ if ! $command ; then
+ ocf_log err "lv_exec_resilient failed"
+ vgscan
+ return $OCF_ERR_GENERIC
+ else
+ vgscan
+ return $OCF_SUCCESS
+ fi
+}
+
+# lv_activate_resilient
+#
+# Sometimes, devices can come back. Their metadata will conflict
+# with the good devices that remain. We must filter out those
+# failed devices when trying to reactivate
+lv_activate_resilient()
+{
+ declare action=$1
+ declare lv_path=$2
+ declare op="-ay"
+
+ if [ -z $action ] || [ -z $lv_path ]; then
+ ocf_log err "lv_activate_resilient: Arguments not supplied"
+ return $OCF_ERR_ARGS
+ fi
+
+ if [ $action != "start" ]; then
+ op="-an"
+ fi
+
+ if ! lv_exec_resilient "lvchange $op $lv_path" ; then
+ ocf_log err "lv_activate_resilient $action failed on $lv_path"
+ return $OCF_ERR_GENERIC
+ else
+ return $OCF_SUCCESS
+ fi
+}
+
+# lv_status
+#
+# Is the LV active?
+lv_status()
+{
+ declare lv_path="$OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name"
+ declare dev="/dev/$lv_path"
+ declare realdev
+ declare owner
+ declare my_name
+
+ #
+ # Check if device is active
+ #
+ if [[ ! $(lvs -o attr --noheadings $lv_path) =~ ....a. ]]; then
+ return $OCF_ERR_GENERIC
+ fi
+
+ if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then
+ ocf_log notice "$OCF_RESKEY_vg_name is a cluster volume. Ignoring..."
+ return $OCF_SUCCESS
+ fi
+
+ #
+ # Check if all links/device nodes are present
+ #
+ if [ -h "$dev" ]; then
+ realdev=$(readlink -f $dev)
+ if [ $? -ne 0 ]; then
+ ocf_log err "Failed to follow link, $dev"
+ return $OCF_ERR_ARGS
+ fi
+
+ if [ ! -b $realdev ]; then
+ ocf_log err "Device node for $lv_path is not present"
+ return $OCF_ERR_GENERIC
+ fi
+ else
+ ocf_log err "Symbolic link for $lv_path is not present"
+ return $OCF_ERR_GENERIC
+ fi
+
+ #
+ # Verify that we are the correct owner
+ #
+ owner=`lvs -o tags --noheadings $lv_path`
+ my_name=$(local_node_name)
+ if [ -z $my_name ]; then
+ ocf_log err "Unable to determine local machine name"
+
+ # FIXME: I don't really want to fail on 1st offense
+ return $OCF_SUCCESS
+ fi
+
+ if [ -z $owner ] || [ $my_name != $owner ]; then
+ ocf_log err "WARNING: $lv_path should not be active"
+ ocf_log err "WARNING: $my_name does not own $lv_path"
+ ocf_log err "WARNING: Attempting shutdown of $lv_path"
+
+ lv_activate_resilient "stop" $lv_path
+ return $OCF_ERR_GENERIC
+ fi
+
+ return $OCF_SUCCESS
+}
+
+# lv_activate_and_tag
+lv_activate_and_tag()
+{
+ declare action=$1
+ declare tag=$2
+ declare lv_path=$3
+
+ if [ -z $action ] || [ -z $tag ] || [ -z $lv_path ]; then
+ ocf_log err "Supplied args: 1) $action, 2) $tag, 3) $lv_path"
+ return $OCF_ERR_ARGS
+ fi
+
+ if [ $action == "start" ]; then
+ ocf_log notice "Activating $lv_path"
+ lvchange --addtag $tag $lv_path
+ if [ $? -ne 0 ]; then
+ ocf_log err "Unable to add tag to $lv_path"
+ return $OCF_ERR_GENERIC
+ fi
+
+ if ! lv_activate_resilient $action $lv_path; then
+ ocf_log err "Unable to activate $lv_path"
+ return $OCF_ERR_GENERIC
+ fi
+ else
+ ocf_log notice "Deactivating $lv_path"
+ if ! lv_activate_resilient $action $lv_path; then
+ ocf_log err "Unable to deactivate $lv_path"
+ return $OCF_ERR_GENERIC
+ fi
+
+ ocf_log notice "Removing ownership tag ($tag) from $lv_path"
+
+ lvchange --deltag $tag $lv_path
+ if [ $? -ne 0 ]; then
+ ocf_log err "Unable to delete tag from $lv_path"
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+
+ return $OCF_SUCCESS
+}
+
+# lv_activate
+# $1: start/stop only
+#
+# Basically, if we want to [de]activate an LVM volume,
+# we must own it. That means that our tag must be on it.
+# This requires a change to /etc/lvm/lvm.conf:
+# volume_list = [ "root_volume", "@my_hostname" ]
+# where "root_volume" is your root volume group and
+# "my_hostname" is $(local_node_name)
+#
+# If there is a node failure, we may wish to "steal" the
+# LV. For that, we need to check if the node that owns
+# it is still part of the cluster. We use the tag to
+# determine who owns the volume then query for their
+# liveness. If they are dead, we can steal.
+lv_activate()
+{
+ declare lv_path="$OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name"
+ declare owner=`lvs -o tags --noheadings $lv_path`
+ declare my_name=$(local_node_name)
+
+ if [ -z $my_name ]; then
+ ocf_log err "Unable to determine cluster node name"
+ return $OCF_ERR_GENERIC
+ fi
+
+ #
+ # FIXME: This code block is repeated below... might be
+ # nice to put it in a function
+ #
+ if [ ! -z $owner ] && [ $owner != $my_name ]; then
+ if is_node_member_clustat $owner ; then
+ ocf_log err "$owner owns $lv_path unable to $1"
+ return $OCF_ERR_GENERIC
+ fi
+ ocf_log notice "Owner of $lv_path is not in the cluster"
+ ocf_log notice "Stealing $lv_path"
+
+ lvchange --deltag $owner $lv_path
+ if [ $? -ne 0 ]; then
+ ocf_log err "Failed to steal $lv_path from $owner"
+ return $OCF_ERR_GENERIC
+ fi
+
+ # Warning --deltag doesn't always result in failure
+ if [ ! -z `lvs -o tags --noheadings $lv_path` ]; then
+ ocf_log err "Failed to steal $lv_path from $owner."
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+
+ if ! lv_activate_and_tag $1 $my_name $lv_path; then
+ ocf_log err "Failed to $1 $lv_path"
+
+ if [ "$1" == "start" ]; then
+ ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name"
+
+ if vgreduce --removemissing --config \
+ "activation { volume_list = \"$OCF_RESKEY_vg_name\" }" \
+ $OCF_RESKEY_vg_name; then
+ ocf_log notice "$OCF_RESKEY_vg_name now consistent"
+ owner=`lvs -o tags --noheadings $lv_path`
+ if [ ! -z $owner ] && [ $owner != $my_name ]; then
+ if is_node_member_clustat $owner ; then
+ ocf_log err "$owner owns $lv_path unable to $1"
+ return $OCF_ERR_GENERIC
+ fi
+ ocf_log notice "Owner of $lv_path is not in the cluster"
+ ocf_log notice "Stealing $lv_path"
+
+ lvchange --deltag $owner $lv_path
+ if [ $? -ne 0 ]; then
+ ocf_log err "Failed to steal $lv_path from $owner"
+ return $OCF_ERR_GENERIC
+ fi
+
+ # Warning --deltag doesn't always result in failure
+ if [ ! -z `lvs -o tags --noheadings $lv_path` ]; then
+ ocf_log err "Failed to steal $lv_path from $owner."
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+
+ if ! lv_activate_and_tag $1 $my_name $lv_path; then
+ ocf_log err "Failed second attempt to $1 $lv_path"
+ return $OCF_ERR_GENERIC
+ else
+ ocf_log notice "Second attempt to $1 $lv_path successful"
+ return $OCF_SUCCESS
+ fi
+ else
+ ocf_log err "Failed to make $OCF_RESKEY_vg_name consistent"
+ return $OCF_ERR_GENERIC
+ fi
+ else
+ ocf_log err "Failed to $1 $lv_path"
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+ return $OCF_SUCCESS
+}
+
+function lv_start
+{
+ if ! lv_activate start; then
+ return 1
+ fi
+
+ return 0
+}
+
+function lv_stop
+{
+ if ! lv_activate stop; then
+ return 1
+ fi
+
+ return 0
+}
--- cluster/rgmanager/src/resources/lvm_by_vg.sh 2008/01/03 20:35:39 1.1
+++ cluster/rgmanager/src/resources/lvm_by_vg.sh 2008/01/03 21:02:53 1.2
@@ -0,0 +1,266 @@
+#!/bin/bash
+
+#
+# Copyright Red Hat Inc., 2007
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; see the file COPYING. If not, write to the
+# Free Software Foundation, Inc., 675 Mass Ave, Cambridge,
+# MA 02139, USA.
+#
+
+# vg_owner
+#
+# Returns:
+# 1 == We are the owner
+# 2 == We can claim it
+# 0 == Owned by someone else
+function vg_owner
+{
+ local owner=`vgs -o tags --noheadings $OCF_RESKEY_vg_name`
+ local my_name=$(local_node_name)
+
+ if [ -z $my_name ]; then
+ ocf_log err "Unable to determine cluster node name"
+ return 0
+ fi
+
+ if [ -z $owner ]; then
+ # No-one owns this VG yet, so we can claim it
+ return 2
+ fi
+
+ if [ $owner != $my_name ]; then
+ if is_node_member_clustat $owner ; then
+ return 0
+ fi
+ return 2
+ fi
+
+ return 1
+}
+
+function strip_tags
+{
+ local i
+
+ for i in `vgs --noheadings -o tags $OCF_RESKEY_vg_name | sed s/","/" "/g`; do
+ ocf_log info "Stripping tag, $i"
+ vgchange --deltag $i
+ done
+
+ if [ ! -z `vgs -o tags --noheadings $OCF_RESKEY_vg_name` ]; then
+ ocf_log err "Failed to remove ownership tags from $OCF_RESKEY_vg_name"
+ return $OCF_ERR_GENERIC
+ fi
+
+ return $OCF_SUCCESS
+}
+
+function strip_and_add_tag
+{
+ if ! strip_tags; then
+ ocf_log err "Failed to remove tags from volume group, $OCF_RESKEY_vg_name"
+ return $OCF_ERR_GENERIC
+ fi
+
+ if ! vgchange --addtag $(local_node_name) $OCF_RESKEY_vg_name; then
+ ocf_log err "Failed to add ownership tag to $OCF_RESKEY_vg_name"
+ return $OCF_ERR_GENERIC
+ fi
+
+ ocf_log info "New tag \"$(local_node_name)\" added to $OCF_RESKEY_vg_name"
+
+ return $OCF_SUCCESS
+}
+
+# vg_status
+#
+# Are all the LVs active?
+function vg_status
+{
+ local i
+ local dev
+ local readdev
+
+ #
+ # Check that all LVs are active
+ #
+ for i in `lvs $OCF_RESKEY_vg_name --noheadings -o attr`; do
+ if [[ ! $i =~ ....a. ]]; then
+ return $OCF_ERR_GENERIC
+ fi
+ done
+
+ #
+ # Check if all links/device nodes are present
+ #
+ for i in `lvs $OCF_RESKEY_vg_name --noheadings -o name`; do
+ dev="/dev/$OCF_RESKEY_vg_name/$i"
+
+ if [ -h $dev ]; then
+ realdev=$(readlink -f $dev)
+ if [ $? -ne 0 ]; then
+ ocf_log err "Failed to follow link, $dev"
+ return $OCF_ERR_GENERIC
+ fi
+
+ if [ ! -b $realdev ]; then
+ ocf_log err "Device node for $dev is not present"
+ return $OCF_ERR_GENERIC
+ fi
+ else
+ ocf_log err "Symbolic link for $lv_path is not present"
+ return $OCF_ERR_GENERIC
+ fi
+ done
+
+ #
+ # Verify that we are the correct owner
+ #
+ vg_owner
+ if [ $? -ne 1 ]; then
+ ocf_log err "WARNING: $OCF_RESKEY_vg_name should not be active"
+ ocf_log err "WARNING: $my_name does not own $OCF_RESKEY_vg_name"
+ ocf_log err "WARNING: Attempting shutdown of $OCF_RESKEY_vg_name"
+
+ # FIXME: may need more force to shut this down
+ vgchange -an $OCF_RESKEY_vg_name
+ return $OCF_ERR_GENERIC
+ fi
+
+ return $OCF_SUCCESS
+}
+
+function vg_verify
+{
+ # Anything to verify?
+ return $OCF_SUCCESS
+}
+
+function vg_start
+{
+ local a
+ local results
+ local all_pvs
+ local resilience
+
+ ocf_log info "Starting volume group, $OCF_RESKEY_vg_name"
+
+ vg_owner
+ case $? in
+ 0)
+ ocf_log info "Someone else owns this volume group"
+ return $OCF_ERR_GENERIC
+ ;;
+ 1)
+ ocf_log info "I own this volume group"
+ ;;
+ 2)
+ ocf_log info "I can claim this volume group"
+ ;;
+ esac
+
+ if ! strip_and_add_tag ||
+ ! vgchange -ay $OCF_RESKEY_vg_name -vvvv >& /tmp/butt; then
+ ocf_log err "Failed to activate volume group, $OCF_RESKEY_vg_name"
+ ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name"
+
+ if ! vgreduce --removemissing --config \
+ "activation { volume_list = \"$OCF_RESKEY_vg_name\" }" \
+ $OCF_RESKEY_vg_name; then
+
+ ocf_log err "Failed to make $OCF_RESKEY_vg_name consistent"
+ return $OCF_ERR_GENERIC
+ fi
+
+ vg_owner
+ if [ $? -eq 0 ]; then
+ ocf_log err "Unable to claim ownership of $OCF_RESKEY_vg_name"
+ return $OCF_ERR_GENERIC
+ fi
+
+ if ! strip_and_add_tag ||
+ ! vgchange -ay $OCF_RESKEY_vg_name; then
+ ocf_log err "Failed second attempt to activate $OCF_RESKEY_vg_name"
+ return $OCF_ERR_GENERIC
+ fi
+
+ ocf_log notice "Second attempt to activate $OCF_RESKEY_vg_name successful"
+ return $OCF_SUCCESS
+ else
+ # The activation commands succeeded, but did they do anything?
+ # Make sure all the logical volumes are active
+ results=(`lvs -o name,attr --noheadings 2> /dev/null $OCF_RESKEY_vg_name`)
+ a=0
+ while [ ! -z ${results[$a]} ]; do
+ if [[ ! ${results[$(($a + 1))]} =~ ....a. ]]; then
+ all_pvs=(`pvs --noheadings -o name 2> /dev/null`)
+ resilience=" --config devices{filter=["
+ for i in ${all_pvs[*]}; do
+ resilience=$resilience'"a|'$i'|",'
+ done
+ resilience=$resilience"\"r|.*|\"]}"
+
+ vgchange -ay $OCF_RESKEY_vg_name $resilience
+ break
+ fi
+ a=$(($a + 2))
+ done
+
+ # We need to check the LVs again if we made the command resilient
+ if [ ! -z $resilience ]; then
+ results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name $resilience 2> /dev/null`)
+ a=0
+ while [ ! -z ${results[$a]} ]; do
+ if [[ ! ${results[$(($a + 1))]} =~ ....a. ]]; then
+ ocf_log err "Failed to activate $OCF_RESKEY_vg_name"
+ return $OCF_ERR_GENERIC
+ fi
+ a=$(($a + 2))
+ done
+ ocf_log err "Orphan storage device in $OCF_RESKEY_vg_name slowing operations"
+ fi
+ fi
+
+ return $OCF_SUCCESS
+}
+
+function vg_stop
+{
+ local a
+ local results
+
+ # Shut down the volume group
+ # Do we need to make this resilient?
+ vgchange -an $OCF_RESKEY_vg_name
+
+ # Make sure all the logical volumes are inactive
+ results=(`lvs -o name,attr --noheadings 2> /dev/null $OCF_RESKEY_vg_name`)
+ a=0
+ while [ ! -z ${results[$a]} ]; do
+ if [[ ${results[$(($a + 1))]} =~ ....a. ]]; then
+ ocf_log err "Logical volume $OCF_RESKEY_vg_name/${results[$a]} failed to shutdown"
+ return $OCF_ERR_GENERIC
+ fi
+ a=$(($a + 2))
+ done
+
+ # Make sure we are the owner before we strip the tags
+ vg_owner
+ if [ $? -ne 0 ]; then
+ strip_tags
+ fi
+
+ return $OCF_SUCCESS
+}
--- cluster/rgmanager/src/resources/Makefile 2007/12/22 13:36:59 1.24
+++ cluster/rgmanager/src/resources/Makefile 2008/01/03 21:02:53 1.25
@@ -17,12 +17,12 @@
RESOURCES=fs.sh service.sh ip.sh nfsclient.sh nfsexport.sh \
script.sh netfs.sh clusterfs.sh smb.sh \
apache.sh openldap.sh samba.sh mysql.sh \
- postgres-8.sh tomcat-5.sh lvm.sh vm.sh \
- SAPInstance SAPDatabase named.sh
+ postgres-8.sh tomcat-5.sh lvm.sh lvm_by_lv.sh lvm_by_vg.sh \
+ vm.sh SAPInstance SAPDatabase named.sh
METADATA=apache.metadata openldap.metadata samba.metadata \
mysql.metadata postgres-8.metadata tomcat-5.metadata \
- named.metadata
+ named.metadata lvm.metadata
TARGETS=ocf-shellfuncs svclib_nfslock default_event_script.sl
--- cluster/rgmanager/src/resources/lvm.sh 2008/01/03 20:56:49 1.11
+++ cluster/rgmanager/src/resources/lvm.sh 2008/01/03 21:02:53 1.12
@@ -146,7 +146,7 @@
rv=0
;;
-verify-all)
+validate-all|verify-all)
##
# We can safely ignore clustered volume groups (VGs handled by CLVM)
##
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2008-01-03 21:02 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-01-03 20:45 [Cluster-devel] cluster/rgmanager/src/resources Makefile lvm.s jbrassow
-- strict thread matches above, loose matches on Subject: below --
2008-01-03 21:02 jbrassow
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.