From mboxrd@z Thu Jan  1 00:00:00 1970
From: lhh@sourceware.org <lhh@sourceware.org>
Date: 1 Dec 2006 22:14:41 -0000
Subject: [Cluster-devel] cluster/fence/agents/xvm fence_xvmd.c README
Message-ID: <20061201221441.32027.qmail@sourceware.org>
List-Id: <cluster-devel.redhat.com>
To: cluster-devel.redhat.com
MIME-Version: 1.0
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit

CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	lhh at sourceware.org	2006-12-01 22:14:40

Modified files:
	fence/agents/xvm: fence_xvmd.c README 

Log message:
	Handle 0.1.9 case of libvirt returning a virDomainPtr + state for a VM that doesn't exist (vm state == VIR_DOMAIN_SHUTOFF)

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/agents/xvm/fence_xvmd.c.diff?cvsroot=cluster&r1=1.5&r2=1.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/agents/xvm/README.diff?cvsroot=cluster&r1=1.1&r2=1.2

--- cluster/fence/agents/xvm/fence_xvmd.c	2006/11/13 16:13:50	1.5
+++ cluster/fence/agents/xvm/fence_xvmd.c	2006/12/01 22:14:40	1.6
@@ -207,6 +207,54 @@
 }
 
 
+static inline int
+wait_domain(fence_req_t *req, virConnectPtr vp, int timeout)
+{
+	int tries = 0;
+	int response = 1;
+	virDomainPtr vdp;
+	virDomainInfo di;
+
+	if (!(vdp = get_domain(req, vp)))
+		return 0;
+
+	/* Check domain liveliness.  If the domain is still here,
+	   we return failure, and the client must then retry */
+	/* XXX On the xen 3.0.4 API, we will be able to guarantee
+	   synchronous virDomainDestroy, so this check will not
+	   be necessary */
+	do {
+		sleep(1);
+		vdp = get_domain(req, vp);
+		if (!vdp) {
+			dprintf(2, "Domain no longer exists\n");
+			response = 0;
+			break;
+		}
+
+		memset(&di, 0, sizeof(di));
+		virDomainGetInfo(vdp, &di);
+		virDomainFree(vdp);
+
+		if (di.state == VIR_DOMAIN_SHUTOFF) {
+			dprintf(2, "Domain has been shut off\n");
+			response = 0;
+			break;
+		}
+		
+		dprintf(4, "Domain still exists (state %d) after %d seconds\n",
+			di.state, tries);
+
+		if (++tries >= timeout)
+			break;
+	} while (1);
+
+	return response;
+}
+
+
+
+
 int
 do_fence_request_tcp(fence_req_t *req, fence_auth_type_t auth,
 		     void *key, size_t key_len, virConnectPtr vp)
@@ -235,24 +283,18 @@
 		break;
 	case FENCE_OFF:
 		printf("Destroying domain %s...\n", (char *)req->domain);
+
+		dprintf(2, "[OFF] Calling virDomainDestroy\n");
 		ret = virDomainDestroy(vdp);
 		if (ret < 0) {
-			/* raise_error(vp); */
+			printf("virDomainDestroy() failed: %d\n", ret);
 			break;
-		} else { 
-			sleep(1);
 		}
 
-		/* Check domain liveliness.  If the domain is still here,
-		   we return failure, and the client must then retry */
-		/* XXX On the xen 3.0.4 API, we will be able to guarantee
-		   synchronous virDomainDestroy, so this check will not
-		   be necessary */
-		vdp = get_domain(req, vp);
-		if (!vdp) {
-			response = 0;	/* Success! */
-		} else {
-			virDomainFree(vdp);
+		response = wait_domain(req, vp, 15);
+
+		if (response) {
+			printf("Domain still exists; fencing failed\n");
 		}
 		break;
 	case FENCE_REBOOT:
@@ -271,42 +313,26 @@
 			       "libvirt\n");
 		}
 
-		dprintf(2, "Calling virDomainDestroy\n");
+		dprintf(2, "[REBOOT] Calling virDomainDestroy\n");
 		ret = virDomainDestroy(vdp);
 		if (ret < 0) {
 			printf("virDomainDestroy() failed: %d\n", ret);
 			if (domain_desc)
 				free(domain_desc);
 			break;
-		} else {
-			/* Give it time for the operation to complete */
-			sleep(3);
 		}
 
-		/* Check domain liveliness.  If the domain is still here,
-		   we return failure, and the client must then retry */
-		/* XXX On the xen 3.0.4 API, we will be able to guarantee
-		   synchronous virDomainDestroy, so this check will not
-		   be necessary */
-		vdp = get_domain(req, vp);
-		if (!vdp) {
-			dprintf(2, "Domain no longer exists\n");
-			response = 0;	/* Success! */
-		} else {
-			printf("Domain still exists; fencing failed\n");
-			virDomainFree(vdp);
-			ret = 1;	/* Failed to kill it */
-		}
+		response = wait_domain(req, vp, 15);
 
-		/* Recreate the domain if possible */
-		if (ret == 0 && domain_desc) {
+		if (response) {
+			printf("Domain still exists; fencing failed\n");
+		} else if (domain_desc) {
+			/* Recreate the domain if possible */
 			/* Success */
 			dprintf(2, "Calling virDomainCreateLinux()...\n");
 			virDomainCreateLinux(vp, domain_desc, 0);
-		}
-
-		if (domain_desc)
 			free(domain_desc);
+		}
 		break;
 	}
 	
@@ -646,17 +672,14 @@
 	int mc_sock;
 	char key[4096];
 	int key_len = 0;
-	char *my_options = "dfi:a:p:C:c:k:u?hVX";
+	char *my_options = "dfi:a:p:C:c:k:u?hV";
 	void *h;
 
 	args_init(&args);
 	args_get_getopt(argc, argv, my_options, &args);
-	if (!(args.flags & F_NOCCS)) {
-		args_get_ccs(my_options, &args);
-	}
 	args_finalize(&args);
 	if (args.debug > 0) {
-		dset(args.debug);
+		_debug = args.debug;
 		args_print(&args);
 	}
 
--- cluster/fence/agents/xvm/README	2006/10/05 16:11:36	1.1
+++ cluster/fence/agents/xvm/README	2006/12/01 22:14:40	1.2
@@ -1,4 +1,4 @@
-I. Fence_xvm - the Xen virtual machine fencing agent
+I. Fence_xvm - virtual machine fencing agent
 
 Fence_xvm is an agent which establishes a communications link between
 a cluster of virtual machines (VC) and a cluster of domain0/physical
@@ -20,11 +20,11 @@
 cluster!).
 
 
-II. Fence_xvmd - The Xen virtual machine fencing host
+II. Fence_xvmd - The virtual machine fencing host
 
 Fence_xvmd is a daemon which runs on physical hosts (e.g. in domain0)
-of the cluster hosting the Xen virtual cluster.  It listens on a port
-for multicast traffic from Xen virtual cluster(s), and takes actions.
+of the cluster hosting the virtual cluster.  It listens on a port
+for multicast traffic from virtual cluster(s), and takes actions.
 Multiple disjoint virtual clusters can coexist on a single physical
 host cluster, but this requires multiple instances of fence_xvmd.
 
@@ -41,11 +41,11 @@
 last- known host is down, we must store the last-known locations of
 each virtual machine in some sort of cluster-wide way.  For this, we
 use the AIS Checkpointing API, which is provided by OpenAIS.  Every
-few seconds, fence_xvmd queries the Xen Hypervisor via libvirt and
+few seconds, fence_xvmd queries the hypervisor via libvirt and
 stores any local VM states in a checkpoint.  In the event of a
 physical node failure (which consequently causes the failure of one
-or more Xen guests), we can then read the checkpoint section
-corresponding to the guest we need to fence to find out the previous
+or more guests), we can then read the checkpoint section corresponding
+to the guest we need to fence to find out the previous
 owner.  With that information, we can then check with CMAN to see if
 the last-known host node has been fenced.  If so, then the VM is
 clean as well.  The physical cluster must, therefore, have fencing
@@ -71,8 +71,10 @@
   (e) Open connection to host contained within multicast
       packet.
   (f) Check with CMAN to see if last-known host has been fenced.
-  (g) If last-known host has been fenced, send success response.
-  (h) Authenticate server & send response.
+      (If it has not; do nothing -- this is why the physical 
+      cluster also needs fencing!)
+  (g) Authenticate server & send response.
+  (h) If last-known host has been fenced, send success response.
 
 NOTE: There is always a possibility that a VM is started again
 before the fencing operation and checkpoint update for that VM
@@ -111,14 +113,70 @@
 
     dd if=/dev/urandom of=/etc/cluster/fence_xvm.key bs=4096 count=1
 
-Distribute the generated key file to all domUs in a cluster as well
-as all dom0s which will be hosting that particular cluster of domUs.
-The key should not be placed on shared file systems (because shared
-file systems require the cluster, which requires fencing...).
-
-Start fence_xvmd on all dom0s
+Distribute the generated key file to all virtual machines in a
+cluster as well as all physical host nodes which will be hosting
+that particular cluster of guests.  More simply, everything involved
+with hosting the virtual cluster as well as the virtual cluster
+itself must have the same key file; it acts as a password.
 
-Configure fence_xvm on the domU cluster...
-
-rest...tbd
+The key should not be placed on shared file systems (because shared
+file systems require the cluster, which requires fencing...).  
+Furthermore, it is considered 'unsupported' to join a host cluster
+and a guest cluster in one management domain.
+
+A. Configuring the host (physical) cluster
+
+On the host cluster, you need to add the following tag as a
+child of the <cluster> tag in /etc/cluster/cluster.conf:
+
+    <fence_xvmd/>
+
+(Do not forget to increment the configuration version number and
+run 'ccs_tool update /etc/cluster/cluster.conf' !).
+
+Start fence_xvmd on all host nodes if it isn't already running.
+Just run 'fence_xvmd'.  The next time the cluster is restarted,
+fence_xvmd will start automatically; it is started by the cman
+script if you have the above tag in cluster.conf.
+
+B.  Configuring the guest (virtual) cluster
+
+On the guest cluster, you need to set up per-node fencing.  This
+is a fairly simple task as well.  First, you need to add a fence
+device for 'xvm'.  Simply add the following to the <fencedevices/>
+tag in the guest cluster's cluster.conf:
+
+    <fencedevice name="xvm" agent="fence_xvm"/>
+
+After doing this, each node also needs individual fencing set up.
+For each <clusternode/> tag, you will need to add something like
+the following:
+
+    <fence>
+        <method name="1">
+            <device name="xvm" domain="doman-name"/>
+        </method>
+    </fence>
+
+For example, if you have a virtual host named 'vm1.test.com' with a
+corresponding virtual domain name of 'domU-vm1' in the dom0 cluster,
+and a node ID of 1, the <clusternode> tag for that virtual machine
+would look like so:
+
+    <clusternode name="vm1.test.com" nodeid="1" votes="1">
+        <fence>
+            <method name="1">
+                <device name="xvm" domain="domU-vm1"/>
+            </method>
+        </fence>
+    </clusternode>
+
+C. Advanced configuration
+
+Any advanced configuration parameters (e.g. changing authentication,
+hashing, key file, etc.) should be included in the <fence_xvmd/> tag
+in the host cluster and the <fencedevice .../> tag in the guest
+cluster.  For a complete list of advanced parameters, see:
 
+    fence_xvmd -h
+    fence_xvm -h