All of lore.kernel.org
 help / color / mirror / Atom feed
From: lhh@sourceware.org <lhh@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/fence/agents/xvm fence_xvmd.c README
Date: 1 Dec 2006 22:14:41 -0000	[thread overview]
Message-ID: <20061201221441.32027.qmail@sourceware.org> (raw)

CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	lhh at sourceware.org	2006-12-01 22:14:40

Modified files:
	fence/agents/xvm: fence_xvmd.c README 

Log message:
	Handle 0.1.9 case of libvirt returning a virDomainPtr + state for a VM that doesn't exist (vm state == VIR_DOMAIN_SHUTOFF)

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/agents/xvm/fence_xvmd.c.diff?cvsroot=cluster&r1=1.5&r2=1.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/agents/xvm/README.diff?cvsroot=cluster&r1=1.1&r2=1.2

--- cluster/fence/agents/xvm/fence_xvmd.c	2006/11/13 16:13:50	1.5
+++ cluster/fence/agents/xvm/fence_xvmd.c	2006/12/01 22:14:40	1.6
@@ -207,6 +207,54 @@
 }
 
 
+static inline int
+wait_domain(fence_req_t *req, virConnectPtr vp, int timeout)
+{
+	int tries = 0;
+	int response = 1;
+	virDomainPtr vdp;
+	virDomainInfo di;
+
+	if (!(vdp = get_domain(req, vp)))
+		return 0;
+
+	/* Check domain liveliness.  If the domain is still here,
+	   we return failure, and the client must then retry */
+	/* XXX On the xen 3.0.4 API, we will be able to guarantee
+	   synchronous virDomainDestroy, so this check will not
+	   be necessary */
+	do {
+		sleep(1);
+		vdp = get_domain(req, vp);
+		if (!vdp) {
+			dprintf(2, "Domain no longer exists\n");
+			response = 0;
+			break;
+		}
+
+		memset(&di, 0, sizeof(di));
+		virDomainGetInfo(vdp, &di);
+		virDomainFree(vdp);
+
+		if (di.state == VIR_DOMAIN_SHUTOFF) {
+			dprintf(2, "Domain has been shut off\n");
+			response = 0;
+			break;
+		}
+		
+		dprintf(4, "Domain still exists (state %d) after %d seconds\n",
+			di.state, tries);
+
+		if (++tries >= timeout)
+			break;
+	} while (1);
+
+	return response;
+}
+
+
+
+
 int
 do_fence_request_tcp(fence_req_t *req, fence_auth_type_t auth,
 		     void *key, size_t key_len, virConnectPtr vp)
@@ -235,24 +283,18 @@
 		break;
 	case FENCE_OFF:
 		printf("Destroying domain %s...\n", (char *)req->domain);
+
+		dprintf(2, "[OFF] Calling virDomainDestroy\n");
 		ret = virDomainDestroy(vdp);
 		if (ret < 0) {
-			/* raise_error(vp); */
+			printf("virDomainDestroy() failed: %d\n", ret);
 			break;
-		} else { 
-			sleep(1);
 		}
 
-		/* Check domain liveliness.  If the domain is still here,
-		   we return failure, and the client must then retry */
-		/* XXX On the xen 3.0.4 API, we will be able to guarantee
-		   synchronous virDomainDestroy, so this check will not
-		   be necessary */
-		vdp = get_domain(req, vp);
-		if (!vdp) {
-			response = 0;	/* Success! */
-		} else {
-			virDomainFree(vdp);
+		response = wait_domain(req, vp, 15);
+
+		if (response) {
+			printf("Domain still exists; fencing failed\n");
 		}
 		break;
 	case FENCE_REBOOT:
@@ -271,42 +313,26 @@
 			       "libvirt\n");
 		}
 
-		dprintf(2, "Calling virDomainDestroy\n");
+		dprintf(2, "[REBOOT] Calling virDomainDestroy\n");
 		ret = virDomainDestroy(vdp);
 		if (ret < 0) {
 			printf("virDomainDestroy() failed: %d\n", ret);
 			if (domain_desc)
 				free(domain_desc);
 			break;
-		} else {
-			/* Give it time for the operation to complete */
-			sleep(3);
 		}
 
-		/* Check domain liveliness.  If the domain is still here,
-		   we return failure, and the client must then retry */
-		/* XXX On the xen 3.0.4 API, we will be able to guarantee
-		   synchronous virDomainDestroy, so this check will not
-		   be necessary */
-		vdp = get_domain(req, vp);
-		if (!vdp) {
-			dprintf(2, "Domain no longer exists\n");
-			response = 0;	/* Success! */
-		} else {
-			printf("Domain still exists; fencing failed\n");
-			virDomainFree(vdp);
-			ret = 1;	/* Failed to kill it */
-		}
+		response = wait_domain(req, vp, 15);
 
-		/* Recreate the domain if possible */
-		if (ret == 0 && domain_desc) {
+		if (response) {
+			printf("Domain still exists; fencing failed\n");
+		} else if (domain_desc) {
+			/* Recreate the domain if possible */
 			/* Success */
 			dprintf(2, "Calling virDomainCreateLinux()...\n");
 			virDomainCreateLinux(vp, domain_desc, 0);
-		}
-
-		if (domain_desc)
 			free(domain_desc);
+		}
 		break;
 	}
 	
@@ -646,17 +672,14 @@
 	int mc_sock;
 	char key[4096];
 	int key_len = 0;
-	char *my_options = "dfi:a:p:C:c:k:u?hVX";
+	char *my_options = "dfi:a:p:C:c:k:u?hV";
 	void *h;
 
 	args_init(&args);
 	args_get_getopt(argc, argv, my_options, &args);
-	if (!(args.flags & F_NOCCS)) {
-		args_get_ccs(my_options, &args);
-	}
 	args_finalize(&args);
 	if (args.debug > 0) {
-		dset(args.debug);
+		_debug = args.debug;
 		args_print(&args);
 	}
 
--- cluster/fence/agents/xvm/README	2006/10/05 16:11:36	1.1
+++ cluster/fence/agents/xvm/README	2006/12/01 22:14:40	1.2
@@ -1,4 +1,4 @@
-I. Fence_xvm - the Xen virtual machine fencing agent
+I. Fence_xvm - virtual machine fencing agent
 
 Fence_xvm is an agent which establishes a communications link between
 a cluster of virtual machines (VC) and a cluster of domain0/physical
@@ -20,11 +20,11 @@
 cluster!).
 
 
-II. Fence_xvmd - The Xen virtual machine fencing host
+II. Fence_xvmd - The virtual machine fencing host
 
 Fence_xvmd is a daemon which runs on physical hosts (e.g. in domain0)
-of the cluster hosting the Xen virtual cluster.  It listens on a port
-for multicast traffic from Xen virtual cluster(s), and takes actions.
+of the cluster hosting the virtual cluster.  It listens on a port
+for multicast traffic from virtual cluster(s), and takes actions.
 Multiple disjoint virtual clusters can coexist on a single physical
 host cluster, but this requires multiple instances of fence_xvmd.
 
@@ -41,11 +41,11 @@
 last- known host is down, we must store the last-known locations of
 each virtual machine in some sort of cluster-wide way.  For this, we
 use the AIS Checkpointing API, which is provided by OpenAIS.  Every
-few seconds, fence_xvmd queries the Xen Hypervisor via libvirt and
+few seconds, fence_xvmd queries the hypervisor via libvirt and
 stores any local VM states in a checkpoint.  In the event of a
 physical node failure (which consequently causes the failure of one
-or more Xen guests), we can then read the checkpoint section
-corresponding to the guest we need to fence to find out the previous
+or more guests), we can then read the checkpoint section corresponding
+to the guest we need to fence to find out the previous
 owner.  With that information, we can then check with CMAN to see if
 the last-known host node has been fenced.  If so, then the VM is
 clean as well.  The physical cluster must, therefore, have fencing
@@ -71,8 +71,10 @@
   (e) Open connection to host contained within multicast
       packet.
   (f) Check with CMAN to see if last-known host has been fenced.
-  (g) If last-known host has been fenced, send success response.
-  (h) Authenticate server & send response.
+      (If it has not; do nothing -- this is why the physical 
+      cluster also needs fencing!)
+  (g) Authenticate server & send response.
+  (h) If last-known host has been fenced, send success response.
 
 NOTE: There is always a possibility that a VM is started again
 before the fencing operation and checkpoint update for that VM
@@ -111,14 +113,70 @@
 
     dd if=/dev/urandom of=/etc/cluster/fence_xvm.key bs=4096 count=1
 
-Distribute the generated key file to all domUs in a cluster as well
-as all dom0s which will be hosting that particular cluster of domUs.
-The key should not be placed on shared file systems (because shared
-file systems require the cluster, which requires fencing...).
-
-Start fence_xvmd on all dom0s
+Distribute the generated key file to all virtual machines in a
+cluster as well as all physical host nodes which will be hosting
+that particular cluster of guests.  More simply, everything involved
+with hosting the virtual cluster as well as the virtual cluster
+itself must have the same key file; it acts as a password.
 
-Configure fence_xvm on the domU cluster...
-
-rest...tbd
+The key should not be placed on shared file systems (because shared
+file systems require the cluster, which requires fencing...).  
+Furthermore, it is considered 'unsupported' to join a host cluster
+and a guest cluster in one management domain.
+
+A. Configuring the host (physical) cluster
+
+On the host cluster, you need to add the following tag as a
+child of the <cluster> tag in /etc/cluster/cluster.conf:
+
+    <fence_xvmd/>
+
+(Do not forget to increment the configuration version number and
+run 'ccs_tool update /etc/cluster/cluster.conf' !).
+
+Start fence_xvmd on all host nodes if it isn't already running.
+Just run 'fence_xvmd'.  The next time the cluster is restarted,
+fence_xvmd will start automatically; it is started by the cman
+script if you have the above tag in cluster.conf.
+
+B.  Configuring the guest (virtual) cluster
+
+On the guest cluster, you need to set up per-node fencing.  This
+is a fairly simple task as well.  First, you need to add a fence
+device for 'xvm'.  Simply add the following to the <fencedevices/>
+tag in the guest cluster's cluster.conf:
+
+    <fencedevice name="xvm" agent="fence_xvm"/>
+
+After doing this, each node also needs individual fencing set up.
+For each <clusternode/> tag, you will need to add something like
+the following:
+
+    <fence>
+        <method name="1">
+            <device name="xvm" domain="doman-name"/>
+        </method>
+    </fence>
+
+For example, if you have a virtual host named 'vm1.test.com' with a
+corresponding virtual domain name of 'domU-vm1' in the dom0 cluster,
+and a node ID of 1, the <clusternode> tag for that virtual machine
+would look like so:
+
+    <clusternode name="vm1.test.com" nodeid="1" votes="1">
+        <fence>
+            <method name="1">
+                <device name="xvm" domain="domU-vm1"/>
+            </method>
+        </fence>
+    </clusternode>
+
+C. Advanced configuration
+
+Any advanced configuration parameters (e.g. changing authentication,
+hashing, key file, etc.) should be included in the <fence_xvmd/> tag
+in the host cluster and the <fencedevice .../> tag in the guest
+cluster.  For a complete list of advanced parameters, see:
 
+    fence_xvmd -h
+    fence_xvm -h



                 reply	other threads:[~2006-12-01 22:14 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20061201221441.32027.qmail@sourceware.org \
    --to=lhh@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.