From mboxrd@z Thu Jan 1 00:00:00 1970 From: rohara@sourceware.org Date: 18 Jan 2008 23:02:58 -0000 Subject: [Cluster-devel] cluster/fence/agents/scsi fence_scsi.pl fence_ ... Message-ID: <20080118230258.27608.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Branch: RHEL5 Changes by: rohara at sourceware.org 2008-01-18 23:02:58 Modified files: fence/agents/scsi: fence_scsi.pl fence_scsi_test.pl scsi_reserve Log message: BZ: 373491, 373511, 373531, 373541, 373571, 429033 BZ: 373491, 373511, 373531, 373541, 373571, 429033 - Prevent "reservation conflict" messageswhen scsi_reserve starts. - Leave the fence domain if scsi_reserve fails to register with any device. - Improve logging in scsi_reserve script. - Use "locking_type = 0" for all lvm commands (ie. vgs). - Fix SCSI reservations scripts to handle LVM mirrors and stripes. - Not an error if fence_scsi attempts to remove a non-existent key from a device. Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/agents/scsi/fence_scsi.pl.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.5.2.6&r2=1.5.2.7 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/agents/scsi/fence_scsi_test.pl.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.2&r2=1.1.2.3 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/agents/scsi/scsi_reserve.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.8&r2=1.1.2.9 --- cluster/fence/agents/scsi/fence_scsi.pl 2007/11/08 18:48:08 1.5.2.6 +++ cluster/fence/agents/scsi/fence_scsi.pl 2008/01/18 23:02:58 1.5.2.7 @@ -11,9 +11,6 @@ s/.*\///; my $pname = $_; -# WARNING!! Do not add code bewteen "#BEGIN_VERSION_GENERATION" and -# "#END_VERSION_GENERATION" It is generated by the Makefile - #BEGIN_VERSION_GENERATION $FENCE_RELEASE_NAME=""; $REDHAT_COPYRIGHT=""; @@ -195,7 +192,6 @@ return $key; } - sub get_options_stdin { my $opt; @@ -252,7 +248,8 @@ ($dev) = @_; my ($in, $out, $err); - my $cmd = "sg_persist -d $dev -i -k"; + + my $cmd = "sg_persist -n -d $dev -i -k"; my $pid = open3($in, $out, $err, $cmd) or die "$!\n"; waitpid($pid, 0); @@ -278,46 +275,36 @@ return %key_list; } -#sub get_scsi_devices -#{ -# my ($in, $out, $err); -# my $cmd = "lvs --noheadings --separator : -o vg_attr,devices"; -# my $pid = open3($in, $out, $err, $cmd) or die "$!\n"; -# -# waitpid($pid, 0); -# -# die "Unable to execute lvs.\n" if ($?>>8); -# -# while (<$out>) -# { -# chomp; -# print "OUT: $_\n" if $opt_v; -# -# my ($vg_attrs, $device) = split(/:/, $_); -# -# if ($vg_attrs =~ /.*c$/) -# { -# $device =~ s/\(.*\)//; -# push(@volumes, $device); -# } -# } -# -# close($in); -# close($out); -# close($err); -#} - sub get_scsi_devices { - open(FILE, "/var/run/scsi_reserve") or die "$!\n"; + my ($in, $out, $err); + + my $cmd = "vgs --config 'global { locking_type = 0 }'" . + " --noheadings --separator : -o vg_attr,pv_name"; - while () + my $pid = open3($in, $out, $err, $cmd) or die "$!\n"; + + waitpid($pid, 0); + + die "Unable to execute lvs.\n" if ($?>>8); + + while (<$out>) { chomp; - push(@volumes, $_); + print "OUT: $_\n" if $opt_v; + + my ($vg_attrs, $device) = split(/:/, $_); + + if ($vg_attrs =~ /.*c$/) + { + $device =~ s/\(.*\)//; + push(@volumes, $device); + } } - close FILE; + close($in); + close($out); + close($err); } sub check_sg_persist @@ -346,7 +333,7 @@ ($dev, $key) = @_; my ($in, $out, $err); - my $cmd = "sg_persist -d $dev -o -G -S $key"; + my $cmd = "sg_persist -n -d $dev -o -G -S $key"; my $pid = open3($in, $out, $err, $cmd) or die "$!\n"; waitpid($pid, 0); @@ -383,13 +370,18 @@ do_register($dev, $host_key); } + if (!$key_list{$node_key}) + { + next; + } + if ($host_key eq $node_key) { - $cmd = "sg_persist -d $dev -o -G -K $host_key -S 0"; + $cmd = "sg_persist -n -d $dev -o -G -K $host_key -S 0"; } else { - $cmd = "sg_persist -d $dev -o -A -K $host_key -S $node_key -T 5"; + $cmd = "sg_persist -n -d $dev -o -A -K $host_key -S $node_key -T 5"; } my $pid = open3($in, $out, $err, $cmd) or die "$!\n"; @@ -419,7 +411,7 @@ usage if defined $opt_h; version if defined $opt_V; - fail_usage "Unkown parameter." if (@ARGV > 0); + fail_usage "Unknown parameter." if (@ARGV > 0); fail_usage "No '-n' flag specified." unless defined $opt_n; --- cluster/fence/agents/scsi/fence_scsi_test.pl 2007/03/06 19:10:34 1.1.2.2 +++ cluster/fence/agents/scsi/fence_scsi_test.pl 2008/01/18 23:02:58 1.1.2.3 @@ -1,123 +1,96 @@ #!/usr/bin/perl +use POSIX; use IPC::Open3; -use Sys::Hostname; +use XML::LibXML; use Getopt::Std; -use POSIX; my @devices; my %results; -# WARNING!! Do not add code bewteen "#BEGIN_VERSION_GENERATION" and -# "#END_VERSION_GENERATION" It is generated by the Makefile - #BEGIN_VERSION_GENERATION $FENCE_RELEASE_NAME=""; $REDHAT_COPYRIGHT=""; $BUILD_DATE=""; #END_VERSION_GENERATION -sub get_key +sub get_scsi_block_devices { - my $name = @_; - my $addr = gethostbyname($name) or die "$!\n"; + my $block_dir = "/sys/block"; - return unpack("H*", $addr); -} + opendir(DIR, $block_dir) or die "$!\n"; -sub register_device -{ - my $func = (caller(0))[3]; - my ($dev, $key) = @_; + my @block_devices = grep { /^sd*/ } readdir(DIR); - print "DEBUG: $func ($dev, $key)\n" if ($opt_d); + closedir(DIR); + for $block_dev (@block_devices) + { + push(@devices, "/dev/" . $block_dev); + } +} + +sub get_cluster_vol_devices +{ my ($in, $out, $err); - my $cmd = "sg_persist -d $dev -o -G -S $key"; + + my $cmd = "vgs --config 'global { locking_type = 0 }'" . + " --noheadings --separator : -o vg_attr,pv_name"; my $pid = open3($in, $out, $err, $cmd) or die "$!\n"; waitpid($pid, 0); - my $rval = WEXITSTATUS($?); + die "[error] unable to execute vgs command.\n" if WEXITSTATUS($?); - $results{$dev}[0] = $rval; + while (<$out>) + { + chomp; - print "DEBUG: [$rval] $cmd\n" if ($opt_d); + my ($vg_attr, $pv_name) = split(/:/, $_); + + if ($vg_attr =~ /.*c$/) + { + ###### DEBUG ###### + print "DEBUG: pv_name = $pv_name\n"; + + push(@devices, $pv_name); + } + } close($in); close($out); close($err); - - return $rval; } -sub unregister_device +sub register_device { - my $func = (caller(0))[3]; my ($dev, $key) = @_; - - print "DEBUG: $func ($dev, $key)\n" if ($opt_d); - my ($in, $out, $err); - my $cmd = "sg_persist -d $dev -o -G -K $key -S 0"; + my $cmd = "sg_persist -n -d $dev -o -G -S $key"; my $pid = open3($in, $out, $err, $cmd) or die "$!\n"; waitpid($pid, 0); - my $rval = WEXITSTATUS($?); - - $results{$dev}[1] = $rval; - - print "DEBUG: [$rval] $cmd\n" if ($opt_d); + $results{$dev}[0] = WEXITSTATUS($?); close($in); close($out); close($err); - - return $rval; -} - -sub get_block_devices -{ - my $block_dir = "/sys/block"; - - opendir(DIR, $block_dir) or die "Error: $! $block_dir\n"; - - my @block_devices = grep { /^sd*/ } readdir(DIR); - - closedir(DIR); - - for $dev (@block_devices) - { - push @devices, "/dev/" . $dev; - } } -sub get_cluster_devices +sub unregister_device { + my ($dev, $key) = @_; my ($in, $out, $err); - my $cmd = "lvs --noheadings --separator : -o vg_attr,devices"; + my $cmd = "sg_persist -n -d $dev -o -G -K $key -S 0"; my $pid = open3($in, $out, $err, $cmd) or die "$!\n"; waitpid($pid, 0); - die "Error: unable to exec lvs command.\n" if WEXITSTATUS($?); - - while (<$out>) - { - chomp; - - my ($vg_attr, $dev) = split(/:/, $_); - - if ($vg_attr =~ /.*c$/) - { - $dev =~ s/\(.*\)//; - push @devices, $dev; - } - } + $results{$dev}[1] = WEXITSTATUS($?); close($in); close($out); @@ -126,20 +99,38 @@ sub test_devices { - my $name = hostname() or die "$!\n"; - my $key = get_key($name); + my $key = "0xDEADBEEF"; foreach $dev (@devices) { - if (register_device($dev, $key) != 0) - { - } - if (unregister_device($dev, $key) != 0) - { - } + register_device($dev, $key); + unregister_device($dev, $key); } } +sub check_config_fence +{ + my $xml = XML::LibXML->new(); + my $tree = $xml->parse_file("/etc/cluster/cluster.conf"); + my $root = "//cluster/fencedevices/fencedevice"; + + my $xpath_fence = "count(${root}[\@agent='fence_scsi'])"; + + return ( ! $tree->findvalue($xpath_fence)); +} + +sub check_config_nodes +{ + my $xml = XML::LibXML->new(); + my $tree = $xml->parse_file("/etc/cluster/cluster.conf"); + my $root = "//cluster/clusternodes/clusternode"; + + my $xpath_name = "count(${root}/\@name)"; + my $xpath_nodeid = "count(${root}/\@nodeid)"; + + return ($tree->findvalue($xpath_name) != $tree->findvalue($xpath_nodeid)); +} + sub print_results { my $device_count = scalar(@devices); @@ -196,9 +187,9 @@ print " -h Help. Prints out this usage information.\n\n"; } -### MAIN ####################################################### +### MAIN ####################################################################### -if (getopts("cdhsv") == 0) +if (getopts("cdhst:v") == 0) { print_usage; exit(1); @@ -213,25 +204,34 @@ if ($opt_c) { print "\nTesting devices in cluster volumes...\n"; - get_cluster_devices; + get_cluster_vol_devices; + test_devices; + print_results; } if ($opt_s) { print "\nTesting all SCSI block devices...\n"; - get_block_devices; + get_scsi_block_devices; + test_devices; + print_results; +} + +if ($opt_t) +{ + if ($opt_t eq "fence") + { + exit check_config_fence; + } + if ($opt_t eq "nodes") + { + exit check_config_nodes; + } } -if (!$opt_c && !$opt_s) +if (!$opt_c && !$opt_s && !$opt_t) { print "\nPlease specify either cluster or SCSI mode.\n"; print_usage; exit(1); } - -test_devices; - -print_results; - -exit 0; - --- cluster/fence/agents/scsi/scsi_reserve 2007/11/08 18:48:08 1.1.2.8 +++ cluster/fence/agents/scsi/scsi_reserve 2008/01/18 23:02:58 1.1.2.9 @@ -1,166 +1,248 @@ #!/bin/bash # +# scsi_reserve: +# # chkconfig: 345 25 75 -# description: start/stop persistent reservation service for lvm +# description: +# config: /etc/sysconfig/scsi_reserve . /etc/init.d/functions +# read in config file if it exists +# +if [ -f /etc/sysconfig/scsi_reserve ] ; then + . /etc/sysconfig/scsi_reserve +fi + +# check if cluster is configured for fence_scsi +# +if ! fence_scsi_test -t fence ; then + logger -t scsi_reserve \ + "[error] cluster not configured for scsi reservations" + exit 1 +fi + +# check for nodeids in config file +# +if ! fence_scsi_test -t nodes ; then + logger -t scsi_reserve \ + "[error] cluster must define nodeid for all nodes" + exit 1 +fi + # check for sg_persist command provided by sg3_utils package # if ! sg_persist -V &> /dev/null ; then - echo "error: sg_persist not found" - exit 2 + logger -t scsi_reserve \ + "[error] unable to exec sg_persist" + exit 1 fi -# get scsi devices that are part of clustered volumes +# check that cman is running # -scsi_devices=$( lvs -o vg_attr,devices --noheadings \ - | awk --posix ' $1 ~ /[-a-z]{5}c/ { print $2 } ' \ - | sed -e 's/([0-9]*)//' | sort | uniq ) +if ! cman_tool status &> /dev/null ; then + logger -t scsi_reserve \ + "[error] cman does not appear to be running" + exit 1 +fi -# if no scsi devices were found we can exit now +# get physical volumes (devices) that are part of cluster volumes # -[ -z "$scsi_devices" ] && exit 0 +scsi_devices=$( vgs --config 'global { locking_type = 0 }' \ + --noheadings -o vg_attr,pv_name 2> /dev/null \ + | awk ' $1 ~ /.*c$/ { print $2 } ' ) + +if [ -z "$scsi_devices" ] ; then + logger -t scsi_reserve \ + "[error] did not find devices in cluster volumes" + exit 1 +fi -# get the node name and node addr from cman +# get the cluster id from cman # -node_name=$( cman_tool status | grep "Node name" | awk -F": " '{ print $2 }' ) -node_addr=$( cman_tool status | grep "Node addr" | awk -F": " '{ print $2 }' ) +cluster_id=$( cman_tool status | grep -i "Cluster ID" \ + | awk -F": " '{ print $2 }' ) + +if [ -z "$cluster_id" ] ; then + logger -s -t scsi_reserve \ + "[error] unable to determine cluster id" + exit 1 +fi -# get cluster id and node id from cman +# get the node id from cman # -c_id=$( cman_tool status | grep -i "Cluster ID" | awk -F": " '{ print $2 }' ) -n_id=$( cman_tool status | grep -i "Node ID" | awk -F": " '{ print $2 }' ) +node_id=$( cman_tool status | grep -i "Node ID" \ + | awk -F": " '{ print $2 }' ) -[ -z "$c_id" ] && exit 1 -[ -z "$n_id" ] && exit 1 +if [ -z "$node_id" ] ; then + logger -t scsi_reserve \ + "[error] unable to determine node id" + exit 1 +fi -# create unique key for this host +# generate unique key using cluster_id and node_id # -key=$( printf "%x%.4x" $c_id $n_id ) +key=$( printf "%x%.4x" $cluster_id $node_id ) + +if [ -z "$key" ] ; then + logger -t scsi_reserve \ + "[error] unable to generate key" + exit 1 +fi -############################################################################### +################################################################################ case $1 in -start) + start) -rval=0 + error=0 + count=0 -echo "$key" > /var/lock/subsys/scsi_reserve + echo -n "Starting scsi_reserve:" -cat /dev/null > /var/run/scsi_reserve + for dev in $scsi_devices + do + # check if our key is already resgistered with this device + # + if sg_persist -n -d $dev -i -k | grep -qiE "^[[:space:]]*0x$key" ; then + logger -t scsi_reserve \ + "[info] already registered with $dev (key=0x$key)" + continue + fi + + # create the scsi registration + # + if ! sg_persist -n -d $dev -o -I -S $key &> /dev/null ; then + logger -t scsi_reserve \ + "[error] unable to register device $dev (key=0x$key)" + : $[ count = $count + 1 ] + error=1 + else + logger -t scsi_reserve \ + "[info] registered with device $dev (key=0x$key)" + fi + + # check to see if reservation already exists + # + if sg_persist -n -d $dev -i -r | grep -qiE "^[[:space:]]*Key=0x" ; then + logger -t scsi_reserve \ + "[info] reservation already exists on $dev" + continue + fi + + # create the scsi reservation + # + if ! sg_persist -n -d $dev -o -R -K $key -T 5 &> /dev/null ; then + logger -t scsi_reserver \ + "[error] unable to create reservation on $dev (key=0x$key)" + : $[ count = $count + 1 ] + error=1 + fi + done + + # leave fence domain if any errors occured during registration + # + if [ $error -eq 0 ] ; then + success + else + logger -t scsi_reserve \ + "[info] $count errors during registration" + logger -t scsi_reserve \ + "[info] leaving the fence domain" + fence_tool leave + failure + fi -# register each device using our key -# -for dev in $scsi_devices -do - - echo -n "Registering device: $dev" - - for error in 1 - do - sg_persist -d $dev -o -G -S $key &>/dev/null || break - error=0 - done - - if [ $error -eq 0 ]; then - echo $dev >> /var/run/scsi_reserve - success - else - # perhaps we are already resgistered - # - if sg_persist -d $dev -i -k 2>/dev/null | grep -qiE "$key" ; then - echo $dev >> /var/run/scsi_reserve - success - else - failure - rval=1 - fi - fi - - echo - - # create a reservation - # - sg_persist -d $dev -o -R -K $key -T 5 &>/dev/null - -done -;; - -stop) - -rval=0 - -# unregister each device for this node -# -for dev in $scsi_devices -do - echo -n "Unregistering device: $dev" - - # get list of keys registered for this device - # - reg_keys=$( sg_persist -d $dev -i -k | grep '^[[:space:]]*0x' ) - - # check if this node/key is the node/key holding the reservation - # - if sg_persist -d $dev -i -r 2>/dev/null | grep -qiE "$key" ; then - if echo "$reg_keys" | grep -qivE "${key#0}" ; then - error=1 - else - for error in 1 - do - sg_persist -d $dev -o -G -K $key -S 0 &>/dev/null || break - error=0 - done - fi - else - for error in 1 - do - sg_persist -d $dev -o -G -K $key -S 0 &>/dev/null || break - error=0 - done - fi + echo - if [ $error -eq 0 ]; then - success "unregister device $dev" - else - failure - rval=1 - fi + ;; # end of start - echo -done + stop) -rm -f /var/lock/subsys/scsi_reserve + error=0 + count=0 -;; + echo -n "Stopping scsi_reserve:" -status) + for dev in $scsi_devices + do + # get list of keys registered with this device + # + key_list=$( sg_persist -n -d $dev -i -k | grep -iE "^[[:space:]]*0x" ) + + # check that our key is registered with this device + # + if ! sg_persist -d $dev -i -k | grep -qiE "^[[:space:]]*0x$key" ; then + logger -t scsi_reserve \ + "[info] not registered with $dev (key=0x$key)" + continue + fi + + # check if our key is the reservation holder + # + if sg_persist -n -d $dev -i -r 2>/dev/null | grep -qiE "$key" ; then + if echo "$key_list" | grep -qivE "$key" ; then + logger -t scsi_reserve \ + "[error] unable to remove registration on $dev (key=0x$key)" + : $[ count = $count + 1 ] + error=1 + continue + fi + fi + + # remove registration for this device + # + if ! sg_persist -n -d $dev -o -G -K $key -S 0 &> /dev/null ; then + logger -t scsi_reserve \ + "[error] failed to remove registration on $dev (key=0x$key)" + : $[ count = $count + 1 ] + error=1 + else + logger -t scsi_reserve \ + "[info] removed registration on $dev (key=0x$key)" + fi + + done + + # report success or failure + # + if [ $error -eq 0 ] ; then + success + else + logger -t scsi_reserve \ + "[info] $count errors occured during unregistration" + failure + fi -rval=0 + echo -# find devices that are registered with our key -# -for dev in $scsi_devices -do - if sg_persist -d $dev -i -k 2>/dev/null | grep -qiE "$key" ; then - devices[${#devices[@]}]=$dev - fi -done + ;; # end of stop -if [ -z "$devices" ]; then - echo "No devices resgistered." -else - echo "Found ${#devices[@]} registered device(s):" + status) - for i in "${devices[@]}" ; do - echo $i - done -fi -;; + error=0 -esac + for dev in $scsi_devices + do + if sg_persist -n -d $dev -i -k | grep -qiE "$key" ; then + devices[${#devices[@]}]=$dev + fi + done + + if [ -z "$devices" ] ; then + echo "No registered devices found." + else + echo "Found ${#devices[@]} registered device(s):" + + for i in "${devices[@]}" + do + echo $i + done + fi + + ;; # end of status -exit $rval +esac +exit $error