From mboxrd@z Thu Jan 1 00:00:00 1970 From: Ryan O'Hara Date: Thu, 6 Aug 2009 13:27:35 -0500 Subject: [Cluster-devel] fence_scsi support for 2 node clusters Message-ID: <20090806182735.GC5143@redhat.com> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit I've attached a proposed patch to fence_scsi that will allow it to work with 2 node clusters. This refers to BZ 516085. Below is a description of the problem and the solution. Ryan -- First, it is absolutely required that a node (IT nexus, actually) be registered with device in order to unregister a key from that device. This detail is key to the solution. In the current version of fence_scsi, there are two problems that prevent fence_scsi from working on 2 node clusters. 1. If a node is asked to fence (remove a key) for a device that it is not registered with, the node performing the fence operation will attempt to register with that device on-the-fly (at fence time). This was done in order to prevent fencing failures. It is true that this case should never happen. 2. For SAN environments with multiple LUNs (the common case), it is absolutely crucial that the list of LUNs (devices) that we need to unregister the key must be ordered consistently on all nodes. This is not guaranteed by lvm since devices names are vary from one node to the next. The reason this is needed is to prevent interleaving of fence operations (sg_persist, unregister key) among devices. Since 2 node fencing is a race, when the two nodes attempt to fence one another is might be possible for each node to fence the other from a subset of the devices. We want to avoid this. The solution for problem #1 is to remove the bit of code that registers with a device at fence time (if needed to continue with fencing). Instead, if a node is asked to fence (remove a key) from a device for which it is not registered, fencing will fail. The solution for problem #2 is to sort the list of devices extracted within the fence_scsi agent using a vgs command. Sorting alphabetically by device name is not sufficient, so instead the agent will extract the device name (pv_name) and uuid (pv_uuid) and build a hash which is keyed on the uuid. The uuid will be consistent on all nodes, and we will sort by uuid. This will insure that devices are ordered identically on each node. With these two changes in place, fence_scsi should work in a 2 node cluster. At fence time, a race will occur -- the first node to successfully fence the other will win. The first node to fence will remove the other node's key from the device(s). The second node will not be able to fence the first because it is no longer registered with the device, and it will fail. -------------- next part -------------- diff --git a/fence/agents/scsi/fence_scsi.pl b/fence/agents/scsi/fence_scsi.pl index 62454c7..ec7e0e3 100755 --- a/fence/agents/scsi/fence_scsi.pl +++ b/fence/agents/scsi/fence_scsi.pl @@ -5,7 +5,7 @@ use XML::LibXML; use IPC::Open3; use POSIX; -my @device_list; +my %device_list; $_ = $0; s/.*\///; @@ -78,7 +78,7 @@ sub get_cluster_id my ($name, $value) = split(/\s*:\s*/, $_); - if ($name eq "Cluster Id") + if (uc($name) eq "CLUSTER ID") { $cluster_id = $value; last; @@ -89,30 +89,30 @@ sub get_cluster_id close($out); close($err); - print "[$pname]: get_cluster_id: cluster_id=$cluster_id\n" if $opt_v; + print "[$pname]: get_cluster_id = $cluster_id\n" if $opt_v; return $cluster_id; } sub get_node_id { - ($node)=@_; + ($name)=@_; my $xml = XML::LibXML->new(); my $tree = $xml->parse_file("/etc/cluster/cluster.conf"); - my $xpath = "//cluster/clusternodes/clusternode[\@name='$node']/\@nodeid"; + my $xpath = "//cluster/clusternodes/clusternode[\@name='$name']/\@nodeid"; my $node_id = $tree->findvalue($xpath); - print "[$pname]: get_node_id ($node): node_id=$node_id\n" if $opt_v; + print "[$pname]: get_node_id ($name) = $node_id\n" if $opt_v; return $node_id; } sub get_node_name { - print "[$pname]: get_hode_name: node_name=$opt_n\n" if $opt_v; + print "[$pname]: get_hode_name = $opt_n\n" if $opt_v; return $opt_n; } @@ -136,7 +136,7 @@ sub get_host_id my ($name, $value) = split(/\s*:\s*/, $_); - if ($name eq "Node ID") + if (uc($name) eq "NODE ID") { $host_id = $value; last; @@ -147,7 +147,7 @@ sub get_host_id close($out); close($err); - print "[$pname]: get_host_id: host_id=$host_id\n" if $opt_v; + print "[$pname]: get_host_id = $host_id\n" if $opt_v; return $host_id; } @@ -171,7 +171,7 @@ sub get_host_name my ($name, $value) = split(/\s*:\s*/, $_); - if ($name eq "Node name") + if (uc($name) eq "NODE NAME") { $host_name = $value; last; @@ -182,25 +182,25 @@ sub get_host_name close($out); close($err); - print "[$pname]: get_host_name: host_name=$host_name\n" if $opt_v; + print "[$pname]: get_host_nam = $host_name\n" if $opt_v; return $host_name; } sub get_key { - ($node)=@_; + ($name)=@_; - my $cluster_id = get_cluster_id; - my $node_id = get_node_id($node); + my $cluster_id = get_cluster_id(); + my $node_id = get_node_id($name); if ($node_id == 0) { - die "Unable to determine nodeid for $node.\n"; + die "Unable to determine nodeid for $name.\n"; } my $key = sprintf "%x%.4x", $cluster_id, $node_id; - print "[$pname]: get_key ($node): key=$key\n" if $opt_v; + print "[$pname]: get_key ($name) = $key\n" if $opt_v; return $key; } @@ -258,11 +258,11 @@ sub get_options_stdin sub get_key_list { - ($dev) = @_; + ($device) = @_; my ($in, $out, $err); - my $cmd = "sg_persist -d $dev -i -k"; + my $cmd = "sg_persist -d $device -i -k"; my $pid = open3($in, $out, $err, $cmd) or die "$!\n"; waitpid($pid, 0); @@ -274,12 +274,13 @@ sub get_key_list while (<$out>) { chomp; + if ($_ =~ /^\s*0x/) { s/^\s+0x//; s/\s+$//; - $key_list{$_} = 1; + $key_list{$_} = undef; } } @@ -290,7 +291,7 @@ sub get_key_list my $count = keys %key_list; my $index = 0; - print "[$pname]: get_key_list: found $count keys registered with $dev\n"; + print "[$pname]: get_key_list: found $count keys registered with $device\n"; for $key (keys %key_list) { @@ -311,7 +312,7 @@ sub get_scsi_devices my ($in, $out, $err); my $cmd = "vgs --config 'global { locking_type = 0 }'" . - " --noheadings --separator : -o vg_attr,pv_name 2> /dev/null"; + " --noheadings --separator : -o vg_attr,pv_name,pv_uuid 2> /dev/null"; my $pid = open3($in, $out, $err, $cmd) or die "$!\n"; @@ -323,12 +324,11 @@ sub get_scsi_devices { chomp; - my ($vg_attrs, $dev) = split(/:/, $_); + my ($vg_attrs, $pv_name, $pv_uuid) = split(/:/, $_); if ($vg_attrs =~ /.*c$/) { - $dev =~ s/\(.*\)//; - push(@device_list, $dev); + $device_list{"\U$pv_uuid"} = $pv_name; } } @@ -336,14 +336,14 @@ sub get_scsi_devices # if ($opt_v) { - my $count = scalar @device_list; + my $count = keys %device_list; my $index = 0; print "[$pname]: get_scsi_devices: found $count devices\n"; - for $dev (@device_list) + for $uuid (sort keys %device_list) { - print "[$pname]: ($index) dev=$dev\n"; + print "[$pname]: ($index) device=$device_list{$uuid} UUID=$uuid\n"; $index++; } } @@ -357,28 +357,12 @@ sub check_sg_persist { my ($in, $out, $err); my $cmd = "sg_persist -V"; - my $pid = open3($in, $out, $err, $cmd) or die "$!\n"; - - waitpid($pid, 0); - - die "Unable to execute sg_persist.\n" if ($?>>8); - - close($in); - close($out); - close($err); -} -sub do_register -{ - ($dev, $key) = @_; - - my ($in, $out, $err); - my $cmd = "sg_persist -n -d $dev -o -G -S $key"; my $pid = open3($in, $out, $err, $cmd) or die "$!\n"; waitpid($pid, 0); - die "Unable to execute sg_persist ($dev).\n" if ($?>>8); + die "Unable to execute sg_persist.\n" if ($?>>8); close($in); close($out); @@ -395,38 +379,28 @@ sub fence_node my ($in, $out, $err); - foreach $dev (@device_list) + for $uuid (sort keys %device_list) { - my %key_list = get_key_list($dev); - - # DEBUG: use -v option - # - if ($opt_v) - { - print "[$pname]: unregister key 0x$node_key from device $dev\n"; - } + my $device = $device_list{$uuid}; + my %key_list = get_key_list($device); - if (!$key_list{$host_key}) + if (! exists $key_list{$host_key}) { - do_register($dev, $host_key); + fail "Unable to perform fence operation."; } - if (!$key_list{$node_key}) + if (! exists $key_list{$node_key}) { - if ($opt_v) - { - print "[$pname]: key 0x$node_key is not registered with device $dev\n"; - } next; } if ($host_key eq $node_key) { - $cmd = "sg_persist -n -d $dev -o -G -K $host_key -S 0"; + $cmd = "sg_persist -n -d $device -o -G -K $host_key -S 0"; } else { - $cmd = "sg_persist -n -d $dev -o -A -K $host_key -S $node_key -T 5"; + $cmd = "sg_persist -n -d $device -o -A -K $host_key -S $node_key -T 5"; } my $pid = open3($in, $out, $err, $cmd) or die "$!\n"; @@ -462,8 +436,8 @@ if (@ARGV > 0) { } -check_sg_persist; +check_sg_persist(); -get_scsi_devices; +get_scsi_devices(); -fence_node; +fence_node();