[Cluster-devel] cluster/rgmanager ChangeLog include/message.h ...

All of lore.kernel.org
 help / color / mirror / Atom feed

From: lhh@sourceware.org <lhh@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/rgmanager ChangeLog include/message.h  ...
Date: 23 Oct 2006 22:47:05 -0000	[thread overview]
Message-ID: <20061023224705.22916.qmail@sourceware.org> (raw)

CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	lhh at sourceware.org	2006-10-23 22:47:01

Modified files:
	rgmanager      : ChangeLog 
	rgmanager/include: message.h vf.h 
	rgmanager/src/clulib: lock.c message.c msg_cluster.c vft.c 
	rgmanager/src/daemons: groups.c main.c rg_forward.c rg_state.c 
	rgmanager/src/resources: Makefile 
Added files:
	rgmanager/src/resources: vm.sh 
Removed files:
	rgmanager/src/resources: xenvm.sh 

Log message:
	Fix #211701 (rgmanager + clustat hangs), #211933 (xenvm rename -> vm)

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&r1=1.29&r2=1.30
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/message.h.diff?cvsroot=cluster&r1=1.3&r2=1.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/vf.h.diff?cvsroot=cluster&r1=1.6&r2=1.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/lock.c.diff?cvsroot=cluster&r1=1.2&r2=1.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/message.c.diff?cvsroot=cluster&r1=1.3&r2=1.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/msg_cluster.c.diff?cvsroot=cluster&r1=1.3&r2=1.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/vft.c.diff?cvsroot=cluster&r1=1.16&r2=1.17
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&r1=1.24&r2=1.25
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/main.c.diff?cvsroot=cluster&r1=1.33&r2=1.34
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_forward.c.diff?cvsroot=cluster&r1=1.7&r2=1.8
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_state.c.diff?cvsroot=cluster&r1=1.23&r2=1.24
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/vm.sh.diff?cvsroot=cluster&r1=NONE&r2=1.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/Makefile.diff?cvsroot=cluster&r1=1.12&r2=1.13
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/xenvm.sh.diff?cvsroot=cluster&r1=1.4&r2=NONE

--- cluster/rgmanager/ChangeLog	2006/10/06 21:23:40	1.29
+++ cluster/rgmanager/ChangeLog	2006/10/23 22:47:00	1.30
@@ -1,3 +1,8 @@
+2006-10-23 Lon Hohberger <lhh@redhat.com>
+	* src/resources/xenvm.sh: Gone; replaced with vm.sh.
+	* The C code parts: Fix deadlocks incurred while testing
+	rgmanager on larger node counts. #211701
+
 2006-10-06 Lon Hohberger <lhh@redhat.com>
 	* src/daemons/main.c: Fix #202492: provide rgmanager's view of
 	who is running rgmanager to clustat.
--- cluster/rgmanager/include/message.h	2006/08/07 22:05:01	1.3
+++ cluster/rgmanager/include/message.h	2006/10/23 22:47:00	1.4
@@ -74,6 +74,7 @@
 	msgctx_type_t type;
 	int flags;
 	/* XXX todo make this opaque */
+	void *sp;
 	union {
 		struct {
 			msg_q_t *queue;
--- cluster/rgmanager/include/vf.h	2006/09/01 19:02:21	1.6
+++ cluster/rgmanager/include/vf.h	2006/10/23 22:47:00	1.7
@@ -148,6 +148,7 @@
 
 /* Return codes for vf_handle_msg... */
 #define VFR_ERROR	100
+#define VFR_TIMEOUT	101
 #define VFR_OK		0
 #define VFR_YES		VFR_OK
 #define VFR_NO		1
--- cluster/rgmanager/src/clulib/lock.c	2006/07/11 23:52:41	1.2
+++ cluster/rgmanager/src/clulib/lock.c	2006/10/23 22:47:00	1.3
@@ -176,6 +176,12 @@
 
 	block = !(options & LKF_NOQUEUE);
 
+	errno = EINVAL;
+	if (!lksb)
+		return -1;
+
+	memset(lksb, 0, sizeof(struct dlm_lksb));
+
 	/*
 	   Try to use a conversion lock mechanism when possible
 	   If the caller calls explicitly with a NULL lock, then
--- cluster/rgmanager/src/clulib/message.c	2006/08/07 22:05:01	1.3
+++ cluster/rgmanager/src/clulib/message.c	2006/10/23 22:47:00	1.4
@@ -184,6 +184,9 @@
 		return -1;
 	}
 
+	/* Record where this was called, in case we have to debug */
+	ctx->sp = __builtin_return_address(0);
+
 	if (ctx->ops && ctx->ops->mo_open)
 		return ctx->ops->mo_open(ctx->type, nodeid, port, ctx, timeout);
 	errno = ENOSYS;
--- cluster/rgmanager/src/clulib/msg_cluster.c	2006/08/08 15:01:27	1.3
+++ cluster/rgmanager/src/clulib/msg_cluster.c	2006/10/23 22:47:00	1.4
@@ -33,6 +33,7 @@
 #include <signals.h>
 #include <gettid.h>
 #include <cman-private.h>
+#include <clulog.h>
 
 /* Ripped from ccsd's setup_local_socket */
 
@@ -711,9 +712,10 @@
 {
 	msg_q_t *node;
 
-	if (ctx->type == MSG_NONE) {
-		printf("Queue_for_context called w/o valid context\n");
-		raise(SIGSEGV);
+	if (ctx->type != MSG_CLUSTER) {
+		clulog(LOG_WARNING, "%s called on invalid context %p\n",
+		       __FUNCTION__, ctx);
+		return;
 	}
 
 	while ((node = malloc(sizeof(*node))) == NULL) {
@@ -826,8 +828,15 @@
 			}
 		}
 #endif
-		
-		queue_for_context(contexts[m->dest_ctx], buf, len);
+		if (m->msg_control == M_CLOSE &&
+		    contexts[m->dest_ctx]->type != MSG_CLUSTER) {
+			/* XXX Work around bug where M_CLOSE is called
+			   on a context which has been destroyed */
+			clulog(LOG_WARNING, "Ignoring M_CLOSE for destroyed "
+			       "context %d\n", m->dest_ctx);
+		} else {
+			queue_for_context(contexts[m->dest_ctx], buf, len);
+		}
 	}
 	/* If none of the above, then we msg for something we've already
 	   detached from our list.  No big deal, just ignore. */
--- cluster/rgmanager/src/clulib/vft.c	2006/09/01 19:02:22	1.16
+++ cluster/rgmanager/src/clulib/vft.c	2006/10/23 22:47:00	1.17
@@ -301,24 +301,15 @@
 	     int timeout)
 {
 	generic_msg_hdr response;
-	struct timeval tv;
 	int x;
 
-	/* Set up for the select */
-	tv.tv_sec = timeout;
-	tv.tv_usec = 0;
-
-	/*
-	 * Wait for activity
-	 */
-	
 	/*
 	 * Flag hosts which we received messages from so we don't
 	 * read a second message.
 	 */
 	while (remain && timeout) {
 
-		if (msg_wait(mcast_ctx, 5) <= 0) {
+		if (msg_wait(mcast_ctx, 1) <= 0) {
 			--timeout;
 			continue;
 		}
@@ -355,7 +346,7 @@
 #ifdef DEBUG
 			printf("VF: Abort: someone voted NO\n");
 #endif
-			return 0;
+			return VFR_ABORT;
 		}
 
 #ifdef DEBUG
@@ -368,14 +359,14 @@
 #ifdef DEBUG
 		printf("VF: Timed out waiting for %d responses\n", remain);
 #endif
-		return 0;
+		return VFR_TIMEOUT;
 	}
 		
 
 	/*
 	 * Whohoooooooo!
 	 */
-	return 1;
+	return VFR_OK;
 }
 
 
@@ -884,6 +875,7 @@
 	}
 
 	msg_close(ctx);
+	msg_free_ctx(ctx);
 	return NULL;
 }
 
@@ -910,7 +902,7 @@
 		sleep(1);
 
 	if (msg_open(MSG_CLUSTER, 0, my_port, ctx, 1) < 0) {
-		free(ctx);	
+		msg_free_ctx(ctx);	
 		free(args);
 		return -1;
 	}
@@ -975,10 +967,6 @@
 int
 vf_shutdown(void)
 {
-	key_node_t *c_key;
-	view_node_t *c_jv;
-	commit_node_t *c_cn;
-
 	pthread_mutex_lock(&vf_mutex);
 	vf_thread_ready = 0;
 	pthread_cancel(vf_thread);
@@ -1138,7 +1126,6 @@
 	if (!data || !datalen || !keyid || !strlen(keyid) || !membership)
 		return -1;
 
-
 	pthread_mutex_lock(&vf_mutex);
 	if (!trans) {
 		trans = _node_id << 16;
@@ -1231,7 +1218,7 @@
 	 * See if we have a consensus =)
 	 */
 	if ((rv = (vf_unanimous(&everyone, trans, remain,
-				5)))) {
+				5))) == VFR_OK) {
 		vf_send_commit(&everyone, trans);
 #ifdef DEBUG
 		printf("VF: Consensus reached!\n");
@@ -1253,7 +1240,7 @@
 	pthread_mutex_unlock(&vf_mutex);
 
 #ifdef DEBUG
-	if (rv) {
+	if (rv == VFR_OK) {
 		getuptime(&end);
 
 		dif.tv_usec = end.tv_usec - start.tv_usec;
@@ -1269,7 +1256,7 @@
 	}
 #endif
 
-	return (rv?0:-1);
+	return rv;
 }
 
 
@@ -1595,7 +1582,7 @@
 			VFR_OK : VFR_ERROR;
 
 	swab_vf_msg_t(msg);
-	ret = (msg_send(ctx, msg, totallen) != -1)?VFR_OK:VFR_ERROR;
+	ret = (msg_send(ctx, msg, totallen) >= 0)?VFR_OK:VFR_ERROR;
 	free(msg);
 	return ret;
 }
@@ -1697,14 +1684,15 @@
 		       //msg->vm_msg.vf_keyid,
 		       //(int)membership->cml_members[x].cn_nodeid);
 
-		if (msg_send(&ctx, msg, sizeof(*msg)) != sizeof(*msg)) {
+		if (msg_send(&ctx, msg, sizeof(*msg)) < sizeof(*msg)) {
 			printf("Couldn't send entire message\n");
+			msg_close(&ctx);
 			continue;
 		}
 
 		gh = NULL;
 		if ((n = msg_receive_simple(&ctx, (generic_msg_hdr **)&gh, 10))
-		    == -1) {
+		    < 0) {
 			if (gh)
 				free(gh);
 			msg_close(&ctx);
--- cluster/rgmanager/src/daemons/groups.c	2006/10/06 21:22:27	1.24
+++ cluster/rgmanager/src/daemons/groups.c	2006/10/23 22:47:01	1.25
@@ -79,6 +79,7 @@
 count_resource_groups(cluster_member_list_t *ml)
 {
 	resource_t *res;
+	resource_node_t *node;
 	char rgname[64], *val;
 	int x;
 	rg_state_t st;
@@ -92,7 +93,9 @@
 
 	pthread_rwlock_rdlock(&resource_lock);
 
-	list_do(&_resources, res) {
+	list_do(&_tree, node) {
+
+		res = node->rn_resource;
 
 		res_build_name(rgname, sizeof(rgname), res);
 
@@ -128,7 +131,7 @@
 			++mp->cn_svcexcl;
 		}
 
-	} while (!list_done(&_resources, res));
+	} while (!list_done(&_tree, node));
 
 	pthread_rwlock_unlock(&resource_lock);
 	return 0;
@@ -527,7 +530,7 @@
 	int depend;
 
 	if (rg_locked()) {
-		clulog(LOG_NOTICE,
+		clulog(LOG_DEBUG,
 			"Resource groups locked; not evaluating\n");
 		return -EAGAIN;
 	}
@@ -1090,8 +1093,20 @@
 		if (curr->rn_resource->r_flags & RF_NEEDSTART)
 			need_init = 1;
 
-		if (get_rg_state_local(rg, &svcblk) < 0)
-			continue;
+		if (!need_init) {
+			if (get_rg_state_local(rg, &svcblk) < 0)
+				continue;
+		} else {
+			if (rg_lock(rg, &lockp) != 0)
+				continue;
+
+			if (get_rg_state(rg, &svcblk) < 0) {
+				rg_unlock(&lockp);
+				continue;
+			}
+
+			rg_unlock(&lockp);
+		}
 
 		if (!need_init && svcblk.rs_owner != my_id())
 			continue;
--- cluster/rgmanager/src/daemons/main.c	2006/10/06 21:22:27	1.33
+++ cluster/rgmanager/src/daemons/main.c	2006/10/23 22:47:01	1.34
@@ -36,6 +36,7 @@
 #include <malloc.h>
 #include <cman-private.h>
 
+#define L_SHUTDOWN (1<<2)
 #define L_SYS (1<<1)
 #define L_USER (1<<0)
 
@@ -59,6 +60,7 @@
 char debug = 0; /* XXX* */
 static int signalled = 0;
 static int port = RG_PORT;
+static char *rgmanager_lsname = "rgmanager"; /* XXX default */
 
 int next_node_id(cluster_member_list_t *membership, int me);
 int rg_event_q(char *svcName, uint32_t state, int owner);
@@ -479,7 +481,7 @@
 			/* No such service! */
 			swab_SmMessageSt(msg_sm);
 
-			if (msg_send(ctx, msg_sm, sizeof (SmMessageSt)) !=
+			if (msg_send(ctx, msg_sm, sizeof (SmMessageSt)) <
 		    	    sizeof (SmMessageSt))
 				clulog(LOG_ERR, "#40: Error replying to "
 				       "action request.\n");
@@ -832,7 +834,7 @@
 void *
 shutdown_thread(void *arg)
 {
-	rg_lockall(L_SYS);
+	rg_lockall(L_SYS|L_SHUTDOWN);
 	rg_doall(RG_STOP_EXITING, 1, NULL);
 	running = 0;
 
@@ -886,7 +888,7 @@
 		return -1;
 	}
 
-	if (clu_lock_init("rgmanager") != 0) {
+	if (clu_lock_init(rgmanager_lsname) != 0) {
 		printf("Locks not working!\n");
 		return -1;
 	}
@@ -982,6 +984,7 @@
 
 	cleanup(cluster_ctx);
 	clulog(LOG_NOTICE, "Shutdown complete, exiting\n");
+	clu_lock_finished(rgmanager_lsname);
 	cman_finish(clu);
 	
 	/*malloc_dump_table(); */ /* Only works if alloc.c us used */
--- cluster/rgmanager/src/daemons/rg_forward.c	2006/09/27 18:58:53	1.7
+++ cluster/rgmanager/src/daemons/rg_forward.c	2006/10/23 22:47:01	1.8
@@ -48,22 +48,15 @@
 	rg_state_t rgs;
 	request_t *req = (request_t *)arg;
 	struct dlm_lksb lockp;
-	msgctx_t ctx;
+	msgctx_t *ctx = NULL;
 	SmMessageSt msg;
 
-	if (rg_lock(req->rr_group, &lockp) != 0) {
-		msg_close(req->rr_resp_ctx);
-		msg_free_ctx(req->rr_resp_ctx);
-		rq_free(req);
-		pthread_exit(NULL);
-	}
+	if (rg_lock(req->rr_group, &lockp) != 0)
+		goto out_fail;
 
 	if (get_rg_state(req->rr_group, &rgs) != 0) {
 		rg_unlock(&lockp);
-		msg_close(req->rr_resp_ctx);
-		msg_free_ctx(req->rr_resp_ctx);
-		rq_free(req);
-		pthread_exit(NULL);
+		goto out_fail;
 	}
 
 	rg_unlock(&lockp);
@@ -84,35 +77,32 @@
 	clulog(LOG_DEBUG, "Forwarding %s request to %d\n",
 	       rg_req_str(req->rr_request), rgs.rs_owner);
 
-	if (msg_open(MSG_CLUSTER, rgs.rs_owner, RG_PORT, &ctx, 10) < 0)  {
-		msg_close(req->rr_resp_ctx);
-		msg_free_ctx(req->rr_resp_ctx);
-		rq_free(req);
-		pthread_exit(NULL);
-	}
+	while ((ctx = msg_new_ctx()) == NULL)
+		sleep(1);
 
-	if (msg_send(&ctx, &msg, sizeof(msg)) != sizeof(msg)) {
-		msg_close(&ctx);
-		msg_close(req->rr_resp_ctx);
-		msg_free_ctx(req->rr_resp_ctx);
-		rq_free(req);
-		pthread_exit(NULL);
-	}
+	if (msg_open(MSG_CLUSTER, rgs.rs_owner, RG_PORT, ctx, 10) < 0)
+		goto out_fail;
+	if (msg_send(ctx, &msg, sizeof(msg)) < sizeof(msg))
+		goto out_fail;
+	if (msg_receive(ctx, &msg, sizeof(msg), 600) < sizeof(msg))
+		goto out_fail;
 
-	if (msg_receive(&ctx, &msg, sizeof(msg), 600) != sizeof(msg)) {
-		msg_close(&ctx);
-		msg_close(req->rr_resp_ctx);
-		msg_free_ctx(req->rr_resp_ctx);
-		rq_free(req);
-		pthread_exit(NULL);
-	}
-	msg_close(&ctx);
+	msg_close(ctx);
+	msg_free_ctx(ctx);
 
 	swab_SmMessageSt(&msg);
 	send_response(msg.sm_data.d_ret, req);
-
 	rq_free(req);
-
+	pthread_exit(NULL);
+	
+out_fail: /* Failure path */
+	if (ctx) {
+		msg_close(ctx);
+		msg_free_ctx(ctx);
+	}
+	msg_close(req->rr_resp_ctx);
+	msg_free_ctx(req->rr_resp_ctx);
+	rq_free(req);
 	pthread_exit(NULL);
 }
 
--- cluster/rgmanager/src/daemons/rg_state.c	2006/09/28 20:01:56	1.23
+++ cluster/rgmanager/src/daemons/rg_state.c	2006/10/23 22:47:01	1.24
@@ -245,7 +245,7 @@
 	char res[256];
 #ifndef OPENAIS
 	cluster_member_list_t *membership;
-	int ret;
+	int ret, tries = 0;
 #endif
 
 	if (name)
@@ -257,11 +257,16 @@
 		return -1;
 	return 0;
 #else
-	membership = member_list();
-	ret = vf_write(membership, VFF_IGN_CONN_ERRORS, res, svcblk,
-       		       sizeof(*svcblk));
-	free_member_list(membership);
-	return ret;
+	do {
+		/* Retry up to 3 times just in case members transition
+		   while we're trying to commit something */
+		membership = member_list();
+		ret = vf_write(membership, VFF_IGN_CONN_ERRORS, res, svcblk,
+       		       	       sizeof(*svcblk));
+		free_member_list(membership);
+	} while (ret == VFR_TIMEOUT && ++tries < 3);
+
+	return (ret==VFR_OK?0:-1);
 #endif
 }
 
@@ -1193,7 +1198,7 @@
 	swab_SmMessageSt(&msg_relo);
 
 	/* Send relocate message to the other node */
-	if (msg_send(&ctx, &msg_relo, sizeof (SmMessageSt)) !=
+	if (msg_send(&ctx, &msg_relo, sizeof (SmMessageSt)) < 
 	    sizeof (SmMessageSt)) {
 		clulog(LOG_ERR,
 		       "#59: Error sending relocate request to member #%d\n",
/cvs/cluster/cluster/rgmanager/src/resources/vm.sh,v  -->  standard output
revision 1.1
--- cluster/rgmanager/src/resources/vm.sh
+++ -	2006-10-23 22:47:04.631911000 +0000
@@ -0,0 +1,422 @@
+#!/bin/bash
+#
+#  Copyright Red Hat Inc., 2005-2006
+#
+#  This program is free software; you can redistribute it and/or modify it
+#  under the terms of the GNU General Public License as published by the
+#  Free Software Foundation; either version 2, or (at your option) any
+#  later version.
+#
+#  This program is distributed in the hope that it will be useful, but
+#  WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; see the file COPYING.  If not, write to the
+#  Free Software Foundation, Inc.,  675 Mass Ave, Cambridge, 
+#  MA 02139, USA.
+#
+
+PATH=/bin:/sbin:/usr/bin:/usr/sbin
+
+export PATH
+
+#
+# Virtual Machine start/stop script (requires the xm command)
+#
+
+meta_data()
+{
+    cat <<EOT
+<?xml version="1.0"?>
+<resource-agent version="rgmanager 2.0" name="vm">
+    <version>1.0</version>
+
+    <longdesc lang="en">
+	Defines a Virtual Machine
+    </longdesc>
+    <shortdesc lang="en">
+        Defines a Virtual Machine
+    </shortdesc>
+
+    <parameters>
+        <parameter name="name" primary="1">
+            <longdesc lang="en">
+                This is the name of the virtual machine.
+            </longdesc>
+            <shortdesc lang="en">
+                Name
+            </shortdesc>
+            <content type="string"/>
+        </parameter>
+    
+        <parameter name="domain">
+            <longdesc lang="en">
+                Fail over domains define lists of cluster members
+                to try in the event that the host of the virtual machine
+		fails.
+            </longdesc>
+            <shortdesc lang="en">
+                Cluster Fail Over Domain
+            </shortdesc>
+            <content type="string"/>
+        </parameter>
+
+        <parameter name="autostart">
+            <longdesc lang="en">
+	    	If set to yes, this resource group will automatically be started
+		after the cluster forms a quorum.  If set to no, this virtual
+		machine will start in the 'disabled' state after the cluster
+		forms a quorum.
+            </longdesc>
+            <shortdesc lang="en">
+	    	Automatic start after quorum formation
+            </shortdesc>
+            <content type="boolean"/>
+        </parameter>
+
+        <parameter name="recovery" reconfig="1">
+            <longdesc lang="en">
+	        This currently has three possible options: "restart" tries
+		to restart this virtual machine locally before
+		attempting to relocate (default); "relocate" does not bother
+		trying to restart the VM locally; "disable" disables
+		the VM if it fails.
+            </longdesc>
+            <shortdesc lang="en">
+	    	Failure recovery policy
+            </shortdesc>
+            <content type="string"/>
+        </parameter>
+
+	<parameter name="memory" reconfig="1">
+	    <longdesc lang="en">
+		Memory size.  This can be reconfigured on the fly.
+	    </longdesc>
+	    <shortdesc lang="en">
+		Memory Size
+	    </shortdesc>
+            <content type="integer"/>
+        </parameter>
+
+	<parameter name="bootloader">
+	    <longdesc lang="en">
+		Boot loader that can start the VM from physical image
+	    </longdesc>
+	    <shortdesc lang="en">
+		Boot loader that can start the VM from physical image
+	    </shortdesc>
+            <content type="string"/>
+        </parameter>
+
+	<parameter name="path">
+	    <longdesc lang="en">
+	    	Path specification 'xm create' will search for the specified
+		VM configuration file
+	    </longdesc>
+	    <shortdesc lang="en">
+	    	Path to virtual machine configuration files
+	    </shortdesc>
+            <content type="string"/>
+        </parameter>
+
+
+	<parameter name="rootdisk_physical" unique="1">
+	    <longdesc lang="en">
+		Root disk for the virtual machine.  (physical, on the host)
+	    </longdesc>
+	    <shortdesc lang="en">
+		Root disk (physical)
+	    </shortdesc>
+            <content type="string"/>
+        </parameter>
+        
+	<parameter name="rootdisk_virtual">
+	    <longdesc lang="en">
+		Root disk for the virtual machine.  (as presented to the VM)
+	    </longdesc>
+	    <shortdesc lang="en">
+		Root disk (virtual)
+	    </shortdesc>
+            <content type="string"/>
+        </parameter>
+
+
+	<parameter name="swapdisk_physical" unique="1">
+	    <longdesc lang="en">
+		Swap disk for the virtual machine.  (physical, on the host)
+	    </longdesc>
+	    <shortdesc lang="en">
+		Swap disk (physical)
+	    </shortdesc>
+            <content type="string"/>
+        </parameter>
+        
+	<parameter name="swapdisk_virtual">
+	    <longdesc lang="en">
+		Swap disk for the virtual machine.  (as presented to the VM)
+	    </longdesc>
+	    <shortdesc lang="en">
+		Swap disk (virtual)
+	    </shortdesc>
+            <content type="string"/>
+        </parameter>
+
+	<parameter name="vif">
+	    <longdesc lang="en">
+		Virtual interface MAC address
+	    </longdesc>
+	    <shortdesc lang="en">
+		Virtual interface MAC address
+	    </shortdesc>
+            <content type="string"/>
+        </parameter>
+
+
+    </parameters>
+
+    <actions>
+        <action name="start" timeout="20"/>
+        <action name="stop" timeout="120"/>
+	
+	<!-- No-ops.  Groups are abstract resource types.  -->
+        <action name="status" timeout="10" interval="30m"/>
+        <action name="monitor" timeout="10" interval="30m"/>
+
+	<!-- reconfigure - reconfigure with new OCF parameters.
+	     NOT OCF COMPATIBLE AT ALL -->
+	<action name="reconfig" timeout="10"/>
+
+	<!-- Suspend: if available, suspend this resource instead of
+	     doing a full stop. -->
+	<!-- <action name="suspend" timeout="10m"/> -->
+	<action name="migrate" timeout="10m"/>
+
+        <action name="meta-data" timeout="5"/>
+        <action name="verify-all" timeout="5"/>
+
+    </actions>
+    
+    <special tag="rgmanager">
+        <attributes maxinstances="1"/>
+    </special>
+</resource-agent>
+EOT
+}
+
+
+build_xm_cmdline()
+{
+	#
+	# Virtual domains should never restart themselves when 
+	# controlled externally; the external monitoring app
+	# should.
+	#
+	declare cmdline="restart=\"never\""
+	declare varp val temp
+
+	#
+	# Transliterate the OCF_RESKEY_* to something the xm
+	# command can recognize.
+	#
+	for var in ${!OCF_RESKEY_*}; do
+		varp=${var/OCF_RESKEY_/}
+		val=`eval "echo \\$$var"`
+
+		case $varp in
+		bootloader)
+			cmdline="$cmdline bootloader=\"$val\""
+			;;
+		rootdisk_physical)
+			[ -n "$OCF_RESKEY_rootdisk_virtual" ] || exit 2
+			cmdline="$cmdline disk=\"phy:$val,$OCF_RESKEY_rootdisk_virtual,w\""
+			;;
+		swapdisk_physical)
+			[ -n "$OCF_RESKEY_swapdisk_virtual" ] || exit 2
+			cmdline="$cmdline disk=\"phy:$val,$OCF_RESKEY_swapdisk_virtual,w\""
+			;;
+		vif)
+			cmdline="$cmdline vif=\"mac=$val\""
+			;;
+		recovery|autostart|domain)
+			;;
+		memory)
+			cmdline="$cmdline $varp=$val"
+			;;
+		swapdisk_virtual)
+			;;
+		rootdisk_virtual)
+			;;
+		name)	# Do nothing with name; add it later
+			;;
+		path)
+			cmdline="$cmdline --path=\"$val\""
+			;;
+		*)
+			cmdline="$cmdline $varp=\"$val\""
+			;;
+		esac
+	done
+
+	if [ -n "$OCF_RESKEY_name" ]; then
+		cmdline="$OCF_RESKEY_name $cmdline"
+	fi
+
+	echo $cmdline
+}
+
+
+#
+# Start a virtual machine given the parameters from
+# the environment.
+#
+start()
+{
+	# Use /dev/null for the configuration file, if xmdefconfig
+	# doesn't exist...
+	#
+	declare cmdline
+
+	cmdline="`build_xm_cmdline`"
+
+	echo "# xm command line: $cmdline"
+
+	eval xm create $cmdline
+	return $?
+}
+
+
+#
+# Stop a VM.  Try to shut it down.  Wait a bit, and if it
+# doesn't shut down, destroy it.
+#
+stop()
+{
+	declare -i timeout=60
+	declare -i ret=1
+	declare st
+
+	for op in $*; do
+		echo xm $op $OCF_RESKEY_name ...
+		xm $op $OCF_RESKEY_name
+
+		timeout=60
+		while [ $timeout -gt 0 ]; do
+			sleep 5
+			((timeout -= 5))
+			status || return 0
+			while read dom state; do
+				#
+				# State is "stopped".  Kill it.
+				#
+				if [ "$dom" != "$OCF_RESKEY_name" ]; then
+					continue
+				fi
+				if [ "$state" != "---s-" ]; then
+					continue
+				fi
+				xm destroy $OCF_RESKEY_name
+			done < <(xm list | awk '{print $1, $5}')
+		done
+	done
+
+	return 1
+}
+
+
+#
+# Reconfigure a running VM.  Currently, all we support is
+# memory ballooning.
+#
+reconfigure()
+{
+	if [ -n "$OCF_RESKEY_memory" ]; then
+		echo "xm balloon $OCF_RESKEY_name $OCF_RESKEY_memory"
+		xm balloon $OCF_RESKEY_name $OCF_RESKEY_memory
+		return $?
+	fi
+	return 0
+}
+
+
+#
+# Simple status check: Find the VM in the list of running
+# VMs
+#
+status()
+{
+	xm list $OCF_RESKEY_name &> /dev/null
+	return $?
+}
+
+
+verify_all()
+{
+	declare errors=0
+
+	if [ -n "$OCF_RESKEY_bootloader" ] && \
+	   ! [ -x "$OCF_RESKEY_bootloader" ]; then
+		echo "$OCF_RESKEY_bootloader is not executable"
+		((errors++))
+	fi
+}
+
+
+migrate()
+{
+	declare target=$1
+
+	# XXX TODO
+	return 1
+}
+
+#
+# A Resource group is abstract, but the OCF RA API doesn't allow for abstract
+# resources, so here it is.
+#
+case $1 in
+	start)
+		start
+		exit $?
+		;;
+	stop)
+		stop shutdown destroy
+		exit $?
+		;;
+	kill)
+		stop destroy
+		exit $?
+		;;
+	recover|restart)
+		exit 0
+		;;
+	status|monitor)
+		status
+		exit $?
+		;;
+	migrate)
+		migrate $2 # Send VM to this node
+		exit $?
+		;;
+	reload)
+		exit 0
+		;;
+	reconfig)
+		echo "$0 RECONFIGURING $OCF_RESKEY_memory"
+		reconfigure
+		exit $?
+		;;
+	meta-data)
+		meta_data
+		exit 0
+		;;
+	verify-all)
+		verify_all
+		exit $?
+		;;
+	*)
+		echo "usage: $0 {start|stop|restart|status|reload|reconfig|meta-data|verify-all}"
+		exit 1
+		;;
+esac
--- cluster/rgmanager/src/resources/Makefile	2006/07/12 15:43:56	1.12
+++ cluster/rgmanager/src/resources/Makefile	2006/10/23 22:47:01	1.13
@@ -18,7 +18,7 @@
 INCLUDE += -I $(top_srcdir)/include
 
 RESOURCES=fs.sh service.sh ip.sh nfsclient.sh nfsexport.sh \
-	script.sh netfs.sh clusterfs.sh smb.sh xenvm.sh
+	script.sh netfs.sh clusterfs.sh smb.sh vm.sh
 
 TARGETS=${RESOURCES} ocf-shellfuncs svclib_nfslock

                 reply	other threads:[~2006-10-23 22:47 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20061023224705.22916.qmail@sourceware.org \
    --to=lhh@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.