From: lhh@sourceware.org <lhh@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/rgmanager ChangeLog include/message.h ...
Date: 23 Oct 2006 22:47:05 -0000 [thread overview]
Message-ID: <20061023224705.22916.qmail@sourceware.org> (raw)
CVSROOT: /cvs/cluster
Module name: cluster
Changes by: lhh at sourceware.org 2006-10-23 22:47:01
Modified files:
rgmanager : ChangeLog
rgmanager/include: message.h vf.h
rgmanager/src/clulib: lock.c message.c msg_cluster.c vft.c
rgmanager/src/daemons: groups.c main.c rg_forward.c rg_state.c
rgmanager/src/resources: Makefile
Added files:
rgmanager/src/resources: vm.sh
Removed files:
rgmanager/src/resources: xenvm.sh
Log message:
Fix #211701 (rgmanager + clustat hangs), #211933 (xenvm rename -> vm)
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&r1=1.29&r2=1.30
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/message.h.diff?cvsroot=cluster&r1=1.3&r2=1.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/vf.h.diff?cvsroot=cluster&r1=1.6&r2=1.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/lock.c.diff?cvsroot=cluster&r1=1.2&r2=1.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/message.c.diff?cvsroot=cluster&r1=1.3&r2=1.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/msg_cluster.c.diff?cvsroot=cluster&r1=1.3&r2=1.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/vft.c.diff?cvsroot=cluster&r1=1.16&r2=1.17
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&r1=1.24&r2=1.25
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/main.c.diff?cvsroot=cluster&r1=1.33&r2=1.34
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_forward.c.diff?cvsroot=cluster&r1=1.7&r2=1.8
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_state.c.diff?cvsroot=cluster&r1=1.23&r2=1.24
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/vm.sh.diff?cvsroot=cluster&r1=NONE&r2=1.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/Makefile.diff?cvsroot=cluster&r1=1.12&r2=1.13
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/xenvm.sh.diff?cvsroot=cluster&r1=1.4&r2=NONE
--- cluster/rgmanager/ChangeLog 2006/10/06 21:23:40 1.29
+++ cluster/rgmanager/ChangeLog 2006/10/23 22:47:00 1.30
@@ -1,3 +1,8 @@
+2006-10-23 Lon Hohberger <lhh@redhat.com>
+ * src/resources/xenvm.sh: Gone; replaced with vm.sh.
+ * The C code parts: Fix deadlocks incurred while testing
+ rgmanager on larger node counts. #211701
+
2006-10-06 Lon Hohberger <lhh@redhat.com>
* src/daemons/main.c: Fix #202492: provide rgmanager's view of
who is running rgmanager to clustat.
--- cluster/rgmanager/include/message.h 2006/08/07 22:05:01 1.3
+++ cluster/rgmanager/include/message.h 2006/10/23 22:47:00 1.4
@@ -74,6 +74,7 @@
msgctx_type_t type;
int flags;
/* XXX todo make this opaque */
+ void *sp;
union {
struct {
msg_q_t *queue;
--- cluster/rgmanager/include/vf.h 2006/09/01 19:02:21 1.6
+++ cluster/rgmanager/include/vf.h 2006/10/23 22:47:00 1.7
@@ -148,6 +148,7 @@
/* Return codes for vf_handle_msg... */
#define VFR_ERROR 100
+#define VFR_TIMEOUT 101
#define VFR_OK 0
#define VFR_YES VFR_OK
#define VFR_NO 1
--- cluster/rgmanager/src/clulib/lock.c 2006/07/11 23:52:41 1.2
+++ cluster/rgmanager/src/clulib/lock.c 2006/10/23 22:47:00 1.3
@@ -176,6 +176,12 @@
block = !(options & LKF_NOQUEUE);
+ errno = EINVAL;
+ if (!lksb)
+ return -1;
+
+ memset(lksb, 0, sizeof(struct dlm_lksb));
+
/*
Try to use a conversion lock mechanism when possible
If the caller calls explicitly with a NULL lock, then
--- cluster/rgmanager/src/clulib/message.c 2006/08/07 22:05:01 1.3
+++ cluster/rgmanager/src/clulib/message.c 2006/10/23 22:47:00 1.4
@@ -184,6 +184,9 @@
return -1;
}
+ /* Record where this was called, in case we have to debug */
+ ctx->sp = __builtin_return_address(0);
+
if (ctx->ops && ctx->ops->mo_open)
return ctx->ops->mo_open(ctx->type, nodeid, port, ctx, timeout);
errno = ENOSYS;
--- cluster/rgmanager/src/clulib/msg_cluster.c 2006/08/08 15:01:27 1.3
+++ cluster/rgmanager/src/clulib/msg_cluster.c 2006/10/23 22:47:00 1.4
@@ -33,6 +33,7 @@
#include <signals.h>
#include <gettid.h>
#include <cman-private.h>
+#include <clulog.h>
/* Ripped from ccsd's setup_local_socket */
@@ -711,9 +712,10 @@
{
msg_q_t *node;
- if (ctx->type == MSG_NONE) {
- printf("Queue_for_context called w/o valid context\n");
- raise(SIGSEGV);
+ if (ctx->type != MSG_CLUSTER) {
+ clulog(LOG_WARNING, "%s called on invalid context %p\n",
+ __FUNCTION__, ctx);
+ return;
}
while ((node = malloc(sizeof(*node))) == NULL) {
@@ -826,8 +828,15 @@
}
}
#endif
-
- queue_for_context(contexts[m->dest_ctx], buf, len);
+ if (m->msg_control == M_CLOSE &&
+ contexts[m->dest_ctx]->type != MSG_CLUSTER) {
+ /* XXX Work around bug where M_CLOSE is called
+ on a context which has been destroyed */
+ clulog(LOG_WARNING, "Ignoring M_CLOSE for destroyed "
+ "context %d\n", m->dest_ctx);
+ } else {
+ queue_for_context(contexts[m->dest_ctx], buf, len);
+ }
}
/* If none of the above, then we msg for something we've already
detached from our list. No big deal, just ignore. */
--- cluster/rgmanager/src/clulib/vft.c 2006/09/01 19:02:22 1.16
+++ cluster/rgmanager/src/clulib/vft.c 2006/10/23 22:47:00 1.17
@@ -301,24 +301,15 @@
int timeout)
{
generic_msg_hdr response;
- struct timeval tv;
int x;
- /* Set up for the select */
- tv.tv_sec = timeout;
- tv.tv_usec = 0;
-
- /*
- * Wait for activity
- */
-
/*
* Flag hosts which we received messages from so we don't
* read a second message.
*/
while (remain && timeout) {
- if (msg_wait(mcast_ctx, 5) <= 0) {
+ if (msg_wait(mcast_ctx, 1) <= 0) {
--timeout;
continue;
}
@@ -355,7 +346,7 @@
#ifdef DEBUG
printf("VF: Abort: someone voted NO\n");
#endif
- return 0;
+ return VFR_ABORT;
}
#ifdef DEBUG
@@ -368,14 +359,14 @@
#ifdef DEBUG
printf("VF: Timed out waiting for %d responses\n", remain);
#endif
- return 0;
+ return VFR_TIMEOUT;
}
/*
* Whohoooooooo!
*/
- return 1;
+ return VFR_OK;
}
@@ -884,6 +875,7 @@
}
msg_close(ctx);
+ msg_free_ctx(ctx);
return NULL;
}
@@ -910,7 +902,7 @@
sleep(1);
if (msg_open(MSG_CLUSTER, 0, my_port, ctx, 1) < 0) {
- free(ctx);
+ msg_free_ctx(ctx);
free(args);
return -1;
}
@@ -975,10 +967,6 @@
int
vf_shutdown(void)
{
- key_node_t *c_key;
- view_node_t *c_jv;
- commit_node_t *c_cn;
-
pthread_mutex_lock(&vf_mutex);
vf_thread_ready = 0;
pthread_cancel(vf_thread);
@@ -1138,7 +1126,6 @@
if (!data || !datalen || !keyid || !strlen(keyid) || !membership)
return -1;
-
pthread_mutex_lock(&vf_mutex);
if (!trans) {
trans = _node_id << 16;
@@ -1231,7 +1218,7 @@
* See if we have a consensus =)
*/
if ((rv = (vf_unanimous(&everyone, trans, remain,
- 5)))) {
+ 5))) == VFR_OK) {
vf_send_commit(&everyone, trans);
#ifdef DEBUG
printf("VF: Consensus reached!\n");
@@ -1253,7 +1240,7 @@
pthread_mutex_unlock(&vf_mutex);
#ifdef DEBUG
- if (rv) {
+ if (rv == VFR_OK) {
getuptime(&end);
dif.tv_usec = end.tv_usec - start.tv_usec;
@@ -1269,7 +1256,7 @@
}
#endif
- return (rv?0:-1);
+ return rv;
}
@@ -1595,7 +1582,7 @@
VFR_OK : VFR_ERROR;
swab_vf_msg_t(msg);
- ret = (msg_send(ctx, msg, totallen) != -1)?VFR_OK:VFR_ERROR;
+ ret = (msg_send(ctx, msg, totallen) >= 0)?VFR_OK:VFR_ERROR;
free(msg);
return ret;
}
@@ -1697,14 +1684,15 @@
//msg->vm_msg.vf_keyid,
//(int)membership->cml_members[x].cn_nodeid);
- if (msg_send(&ctx, msg, sizeof(*msg)) != sizeof(*msg)) {
+ if (msg_send(&ctx, msg, sizeof(*msg)) < sizeof(*msg)) {
printf("Couldn't send entire message\n");
+ msg_close(&ctx);
continue;
}
gh = NULL;
if ((n = msg_receive_simple(&ctx, (generic_msg_hdr **)&gh, 10))
- == -1) {
+ < 0) {
if (gh)
free(gh);
msg_close(&ctx);
--- cluster/rgmanager/src/daemons/groups.c 2006/10/06 21:22:27 1.24
+++ cluster/rgmanager/src/daemons/groups.c 2006/10/23 22:47:01 1.25
@@ -79,6 +79,7 @@
count_resource_groups(cluster_member_list_t *ml)
{
resource_t *res;
+ resource_node_t *node;
char rgname[64], *val;
int x;
rg_state_t st;
@@ -92,7 +93,9 @@
pthread_rwlock_rdlock(&resource_lock);
- list_do(&_resources, res) {
+ list_do(&_tree, node) {
+
+ res = node->rn_resource;
res_build_name(rgname, sizeof(rgname), res);
@@ -128,7 +131,7 @@
++mp->cn_svcexcl;
}
- } while (!list_done(&_resources, res));
+ } while (!list_done(&_tree, node));
pthread_rwlock_unlock(&resource_lock);
return 0;
@@ -527,7 +530,7 @@
int depend;
if (rg_locked()) {
- clulog(LOG_NOTICE,
+ clulog(LOG_DEBUG,
"Resource groups locked; not evaluating\n");
return -EAGAIN;
}
@@ -1090,8 +1093,20 @@
if (curr->rn_resource->r_flags & RF_NEEDSTART)
need_init = 1;
- if (get_rg_state_local(rg, &svcblk) < 0)
- continue;
+ if (!need_init) {
+ if (get_rg_state_local(rg, &svcblk) < 0)
+ continue;
+ } else {
+ if (rg_lock(rg, &lockp) != 0)
+ continue;
+
+ if (get_rg_state(rg, &svcblk) < 0) {
+ rg_unlock(&lockp);
+ continue;
+ }
+
+ rg_unlock(&lockp);
+ }
if (!need_init && svcblk.rs_owner != my_id())
continue;
--- cluster/rgmanager/src/daemons/main.c 2006/10/06 21:22:27 1.33
+++ cluster/rgmanager/src/daemons/main.c 2006/10/23 22:47:01 1.34
@@ -36,6 +36,7 @@
#include <malloc.h>
#include <cman-private.h>
+#define L_SHUTDOWN (1<<2)
#define L_SYS (1<<1)
#define L_USER (1<<0)
@@ -59,6 +60,7 @@
char debug = 0; /* XXX* */
static int signalled = 0;
static int port = RG_PORT;
+static char *rgmanager_lsname = "rgmanager"; /* XXX default */
int next_node_id(cluster_member_list_t *membership, int me);
int rg_event_q(char *svcName, uint32_t state, int owner);
@@ -479,7 +481,7 @@
/* No such service! */
swab_SmMessageSt(msg_sm);
- if (msg_send(ctx, msg_sm, sizeof (SmMessageSt)) !=
+ if (msg_send(ctx, msg_sm, sizeof (SmMessageSt)) <
sizeof (SmMessageSt))
clulog(LOG_ERR, "#40: Error replying to "
"action request.\n");
@@ -832,7 +834,7 @@
void *
shutdown_thread(void *arg)
{
- rg_lockall(L_SYS);
+ rg_lockall(L_SYS|L_SHUTDOWN);
rg_doall(RG_STOP_EXITING, 1, NULL);
running = 0;
@@ -886,7 +888,7 @@
return -1;
}
- if (clu_lock_init("rgmanager") != 0) {
+ if (clu_lock_init(rgmanager_lsname) != 0) {
printf("Locks not working!\n");
return -1;
}
@@ -982,6 +984,7 @@
cleanup(cluster_ctx);
clulog(LOG_NOTICE, "Shutdown complete, exiting\n");
+ clu_lock_finished(rgmanager_lsname);
cman_finish(clu);
/*malloc_dump_table(); */ /* Only works if alloc.c us used */
--- cluster/rgmanager/src/daemons/rg_forward.c 2006/09/27 18:58:53 1.7
+++ cluster/rgmanager/src/daemons/rg_forward.c 2006/10/23 22:47:01 1.8
@@ -48,22 +48,15 @@
rg_state_t rgs;
request_t *req = (request_t *)arg;
struct dlm_lksb lockp;
- msgctx_t ctx;
+ msgctx_t *ctx = NULL;
SmMessageSt msg;
- if (rg_lock(req->rr_group, &lockp) != 0) {
- msg_close(req->rr_resp_ctx);
- msg_free_ctx(req->rr_resp_ctx);
- rq_free(req);
- pthread_exit(NULL);
- }
+ if (rg_lock(req->rr_group, &lockp) != 0)
+ goto out_fail;
if (get_rg_state(req->rr_group, &rgs) != 0) {
rg_unlock(&lockp);
- msg_close(req->rr_resp_ctx);
- msg_free_ctx(req->rr_resp_ctx);
- rq_free(req);
- pthread_exit(NULL);
+ goto out_fail;
}
rg_unlock(&lockp);
@@ -84,35 +77,32 @@
clulog(LOG_DEBUG, "Forwarding %s request to %d\n",
rg_req_str(req->rr_request), rgs.rs_owner);
- if (msg_open(MSG_CLUSTER, rgs.rs_owner, RG_PORT, &ctx, 10) < 0) {
- msg_close(req->rr_resp_ctx);
- msg_free_ctx(req->rr_resp_ctx);
- rq_free(req);
- pthread_exit(NULL);
- }
+ while ((ctx = msg_new_ctx()) == NULL)
+ sleep(1);
- if (msg_send(&ctx, &msg, sizeof(msg)) != sizeof(msg)) {
- msg_close(&ctx);
- msg_close(req->rr_resp_ctx);
- msg_free_ctx(req->rr_resp_ctx);
- rq_free(req);
- pthread_exit(NULL);
- }
+ if (msg_open(MSG_CLUSTER, rgs.rs_owner, RG_PORT, ctx, 10) < 0)
+ goto out_fail;
+ if (msg_send(ctx, &msg, sizeof(msg)) < sizeof(msg))
+ goto out_fail;
+ if (msg_receive(ctx, &msg, sizeof(msg), 600) < sizeof(msg))
+ goto out_fail;
- if (msg_receive(&ctx, &msg, sizeof(msg), 600) != sizeof(msg)) {
- msg_close(&ctx);
- msg_close(req->rr_resp_ctx);
- msg_free_ctx(req->rr_resp_ctx);
- rq_free(req);
- pthread_exit(NULL);
- }
- msg_close(&ctx);
+ msg_close(ctx);
+ msg_free_ctx(ctx);
swab_SmMessageSt(&msg);
send_response(msg.sm_data.d_ret, req);
-
rq_free(req);
-
+ pthread_exit(NULL);
+
+out_fail: /* Failure path */
+ if (ctx) {
+ msg_close(ctx);
+ msg_free_ctx(ctx);
+ }
+ msg_close(req->rr_resp_ctx);
+ msg_free_ctx(req->rr_resp_ctx);
+ rq_free(req);
pthread_exit(NULL);
}
--- cluster/rgmanager/src/daemons/rg_state.c 2006/09/28 20:01:56 1.23
+++ cluster/rgmanager/src/daemons/rg_state.c 2006/10/23 22:47:01 1.24
@@ -245,7 +245,7 @@
char res[256];
#ifndef OPENAIS
cluster_member_list_t *membership;
- int ret;
+ int ret, tries = 0;
#endif
if (name)
@@ -257,11 +257,16 @@
return -1;
return 0;
#else
- membership = member_list();
- ret = vf_write(membership, VFF_IGN_CONN_ERRORS, res, svcblk,
- sizeof(*svcblk));
- free_member_list(membership);
- return ret;
+ do {
+ /* Retry up to 3 times just in case members transition
+ while we're trying to commit something */
+ membership = member_list();
+ ret = vf_write(membership, VFF_IGN_CONN_ERRORS, res, svcblk,
+ sizeof(*svcblk));
+ free_member_list(membership);
+ } while (ret == VFR_TIMEOUT && ++tries < 3);
+
+ return (ret==VFR_OK?0:-1);
#endif
}
@@ -1193,7 +1198,7 @@
swab_SmMessageSt(&msg_relo);
/* Send relocate message to the other node */
- if (msg_send(&ctx, &msg_relo, sizeof (SmMessageSt)) !=
+ if (msg_send(&ctx, &msg_relo, sizeof (SmMessageSt)) <
sizeof (SmMessageSt)) {
clulog(LOG_ERR,
"#59: Error sending relocate request to member #%d\n",
/cvs/cluster/cluster/rgmanager/src/resources/vm.sh,v --> standard output
revision 1.1
--- cluster/rgmanager/src/resources/vm.sh
+++ - 2006-10-23 22:47:04.631911000 +0000
@@ -0,0 +1,422 @@
+#!/bin/bash
+#
+# Copyright Red Hat Inc., 2005-2006
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; see the file COPYING. If not, write to the
+# Free Software Foundation, Inc., 675 Mass Ave, Cambridge,
+# MA 02139, USA.
+#
+
+PATH=/bin:/sbin:/usr/bin:/usr/sbin
+
+export PATH
+
+#
+# Virtual Machine start/stop script (requires the xm command)
+#
+
+meta_data()
+{
+ cat <<EOT
+<?xml version="1.0"?>
+<resource-agent version="rgmanager 2.0" name="vm">
+ <version>1.0</version>
+
+ <longdesc lang="en">
+ Defines a Virtual Machine
+ </longdesc>
+ <shortdesc lang="en">
+ Defines a Virtual Machine
+ </shortdesc>
+
+ <parameters>
+ <parameter name="name" primary="1">
+ <longdesc lang="en">
+ This is the name of the virtual machine.
+ </longdesc>
+ <shortdesc lang="en">
+ Name
+ </shortdesc>
+ <content type="string"/>
+ </parameter>
+
+ <parameter name="domain">
+ <longdesc lang="en">
+ Fail over domains define lists of cluster members
+ to try in the event that the host of the virtual machine
+ fails.
+ </longdesc>
+ <shortdesc lang="en">
+ Cluster Fail Over Domain
+ </shortdesc>
+ <content type="string"/>
+ </parameter>
+
+ <parameter name="autostart">
+ <longdesc lang="en">
+ If set to yes, this resource group will automatically be started
+ after the cluster forms a quorum. If set to no, this virtual
+ machine will start in the 'disabled' state after the cluster
+ forms a quorum.
+ </longdesc>
+ <shortdesc lang="en">
+ Automatic start after quorum formation
+ </shortdesc>
+ <content type="boolean"/>
+ </parameter>
+
+ <parameter name="recovery" reconfig="1">
+ <longdesc lang="en">
+ This currently has three possible options: "restart" tries
+ to restart this virtual machine locally before
+ attempting to relocate (default); "relocate" does not bother
+ trying to restart the VM locally; "disable" disables
+ the VM if it fails.
+ </longdesc>
+ <shortdesc lang="en">
+ Failure recovery policy
+ </shortdesc>
+ <content type="string"/>
+ </parameter>
+
+ <parameter name="memory" reconfig="1">
+ <longdesc lang="en">
+ Memory size. This can be reconfigured on the fly.
+ </longdesc>
+ <shortdesc lang="en">
+ Memory Size
+ </shortdesc>
+ <content type="integer"/>
+ </parameter>
+
+ <parameter name="bootloader">
+ <longdesc lang="en">
+ Boot loader that can start the VM from physical image
+ </longdesc>
+ <shortdesc lang="en">
+ Boot loader that can start the VM from physical image
+ </shortdesc>
+ <content type="string"/>
+ </parameter>
+
+ <parameter name="path">
+ <longdesc lang="en">
+ Path specification 'xm create' will search for the specified
+ VM configuration file
+ </longdesc>
+ <shortdesc lang="en">
+ Path to virtual machine configuration files
+ </shortdesc>
+ <content type="string"/>
+ </parameter>
+
+
+ <parameter name="rootdisk_physical" unique="1">
+ <longdesc lang="en">
+ Root disk for the virtual machine. (physical, on the host)
+ </longdesc>
+ <shortdesc lang="en">
+ Root disk (physical)
+ </shortdesc>
+ <content type="string"/>
+ </parameter>
+
+ <parameter name="rootdisk_virtual">
+ <longdesc lang="en">
+ Root disk for the virtual machine. (as presented to the VM)
+ </longdesc>
+ <shortdesc lang="en">
+ Root disk (virtual)
+ </shortdesc>
+ <content type="string"/>
+ </parameter>
+
+
+ <parameter name="swapdisk_physical" unique="1">
+ <longdesc lang="en">
+ Swap disk for the virtual machine. (physical, on the host)
+ </longdesc>
+ <shortdesc lang="en">
+ Swap disk (physical)
+ </shortdesc>
+ <content type="string"/>
+ </parameter>
+
+ <parameter name="swapdisk_virtual">
+ <longdesc lang="en">
+ Swap disk for the virtual machine. (as presented to the VM)
+ </longdesc>
+ <shortdesc lang="en">
+ Swap disk (virtual)
+ </shortdesc>
+ <content type="string"/>
+ </parameter>
+
+ <parameter name="vif">
+ <longdesc lang="en">
+ Virtual interface MAC address
+ </longdesc>
+ <shortdesc lang="en">
+ Virtual interface MAC address
+ </shortdesc>
+ <content type="string"/>
+ </parameter>
+
+
+ </parameters>
+
+ <actions>
+ <action name="start" timeout="20"/>
+ <action name="stop" timeout="120"/>
+
+ <!-- No-ops. Groups are abstract resource types. -->
+ <action name="status" timeout="10" interval="30m"/>
+ <action name="monitor" timeout="10" interval="30m"/>
+
+ <!-- reconfigure - reconfigure with new OCF parameters.
+ NOT OCF COMPATIBLE AT ALL -->
+ <action name="reconfig" timeout="10"/>
+
+ <!-- Suspend: if available, suspend this resource instead of
+ doing a full stop. -->
+ <!-- <action name="suspend" timeout="10m"/> -->
+ <action name="migrate" timeout="10m"/>
+
+ <action name="meta-data" timeout="5"/>
+ <action name="verify-all" timeout="5"/>
+
+ </actions>
+
+ <special tag="rgmanager">
+ <attributes maxinstances="1"/>
+ </special>
+</resource-agent>
+EOT
+}
+
+
+build_xm_cmdline()
+{
+ #
+ # Virtual domains should never restart themselves when
+ # controlled externally; the external monitoring app
+ # should.
+ #
+ declare cmdline="restart=\"never\""
+ declare varp val temp
+
+ #
+ # Transliterate the OCF_RESKEY_* to something the xm
+ # command can recognize.
+ #
+ for var in ${!OCF_RESKEY_*}; do
+ varp=${var/OCF_RESKEY_/}
+ val=`eval "echo \\$$var"`
+
+ case $varp in
+ bootloader)
+ cmdline="$cmdline bootloader=\"$val\""
+ ;;
+ rootdisk_physical)
+ [ -n "$OCF_RESKEY_rootdisk_virtual" ] || exit 2
+ cmdline="$cmdline disk=\"phy:$val,$OCF_RESKEY_rootdisk_virtual,w\""
+ ;;
+ swapdisk_physical)
+ [ -n "$OCF_RESKEY_swapdisk_virtual" ] || exit 2
+ cmdline="$cmdline disk=\"phy:$val,$OCF_RESKEY_swapdisk_virtual,w\""
+ ;;
+ vif)
+ cmdline="$cmdline vif=\"mac=$val\""
+ ;;
+ recovery|autostart|domain)
+ ;;
+ memory)
+ cmdline="$cmdline $varp=$val"
+ ;;
+ swapdisk_virtual)
+ ;;
+ rootdisk_virtual)
+ ;;
+ name) # Do nothing with name; add it later
+ ;;
+ path)
+ cmdline="$cmdline --path=\"$val\""
+ ;;
+ *)
+ cmdline="$cmdline $varp=\"$val\""
+ ;;
+ esac
+ done
+
+ if [ -n "$OCF_RESKEY_name" ]; then
+ cmdline="$OCF_RESKEY_name $cmdline"
+ fi
+
+ echo $cmdline
+}
+
+
+#
+# Start a virtual machine given the parameters from
+# the environment.
+#
+start()
+{
+ # Use /dev/null for the configuration file, if xmdefconfig
+ # doesn't exist...
+ #
+ declare cmdline
+
+ cmdline="`build_xm_cmdline`"
+
+ echo "# xm command line: $cmdline"
+
+ eval xm create $cmdline
+ return $?
+}
+
+
+#
+# Stop a VM. Try to shut it down. Wait a bit, and if it
+# doesn't shut down, destroy it.
+#
+stop()
+{
+ declare -i timeout=60
+ declare -i ret=1
+ declare st
+
+ for op in $*; do
+ echo xm $op $OCF_RESKEY_name ...
+ xm $op $OCF_RESKEY_name
+
+ timeout=60
+ while [ $timeout -gt 0 ]; do
+ sleep 5
+ ((timeout -= 5))
+ status || return 0
+ while read dom state; do
+ #
+ # State is "stopped". Kill it.
+ #
+ if [ "$dom" != "$OCF_RESKEY_name" ]; then
+ continue
+ fi
+ if [ "$state" != "---s-" ]; then
+ continue
+ fi
+ xm destroy $OCF_RESKEY_name
+ done < <(xm list | awk '{print $1, $5}')
+ done
+ done
+
+ return 1
+}
+
+
+#
+# Reconfigure a running VM. Currently, all we support is
+# memory ballooning.
+#
+reconfigure()
+{
+ if [ -n "$OCF_RESKEY_memory" ]; then
+ echo "xm balloon $OCF_RESKEY_name $OCF_RESKEY_memory"
+ xm balloon $OCF_RESKEY_name $OCF_RESKEY_memory
+ return $?
+ fi
+ return 0
+}
+
+
+#
+# Simple status check: Find the VM in the list of running
+# VMs
+#
+status()
+{
+ xm list $OCF_RESKEY_name &> /dev/null
+ return $?
+}
+
+
+verify_all()
+{
+ declare errors=0
+
+ if [ -n "$OCF_RESKEY_bootloader" ] && \
+ ! [ -x "$OCF_RESKEY_bootloader" ]; then
+ echo "$OCF_RESKEY_bootloader is not executable"
+ ((errors++))
+ fi
+}
+
+
+migrate()
+{
+ declare target=$1
+
+ # XXX TODO
+ return 1
+}
+
+#
+# A Resource group is abstract, but the OCF RA API doesn't allow for abstract
+# resources, so here it is.
+#
+case $1 in
+ start)
+ start
+ exit $?
+ ;;
+ stop)
+ stop shutdown destroy
+ exit $?
+ ;;
+ kill)
+ stop destroy
+ exit $?
+ ;;
+ recover|restart)
+ exit 0
+ ;;
+ status|monitor)
+ status
+ exit $?
+ ;;
+ migrate)
+ migrate $2 # Send VM to this node
+ exit $?
+ ;;
+ reload)
+ exit 0
+ ;;
+ reconfig)
+ echo "$0 RECONFIGURING $OCF_RESKEY_memory"
+ reconfigure
+ exit $?
+ ;;
+ meta-data)
+ meta_data
+ exit 0
+ ;;
+ verify-all)
+ verify_all
+ exit $?
+ ;;
+ *)
+ echo "usage: $0 {start|stop|restart|status|reload|reconfig|meta-data|verify-all}"
+ exit 1
+ ;;
+esac
--- cluster/rgmanager/src/resources/Makefile 2006/07/12 15:43:56 1.12
+++ cluster/rgmanager/src/resources/Makefile 2006/10/23 22:47:01 1.13
@@ -18,7 +18,7 @@
INCLUDE += -I $(top_srcdir)/include
RESOURCES=fs.sh service.sh ip.sh nfsclient.sh nfsexport.sh \
- script.sh netfs.sh clusterfs.sh smb.sh xenvm.sh
+ script.sh netfs.sh clusterfs.sh smb.sh vm.sh
TARGETS=${RESOURCES} ocf-shellfuncs svclib_nfslock
reply other threads:[~2006-10-23 22:47 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20061023224705.22916.qmail@sourceware.org \
--to=lhh@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).