From mboxrd@z Thu Jan 1 00:00:00 1970 From: Lon Hohberger Date: Wed, 2 Dec 2009 16:24:53 -0500 Subject: [Cluster-devel] [PATCH] rgmanager: Fix relocation & migration errors Message-ID: <1259789093-17628-1-git-send-email-lhh@redhat.com> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit If you relocate a service but in the end, it ends up on the same node, the error message was "Failure". While technically correct because the relocation failed, there really is no reason to not have an error to indicate the condition that the service is still running. Furthermore, during migration, if a migration had a non-critical failure causing the migration to fail but leaving a virtual machine running on the original owner, there was no method to detect this particular condition. Signed-off-by: Lon Hohberger --- rgmanager/include/resgroup.h | 2 ++ rgmanager/src/clulib/rg_strings.c | 1 + rgmanager/src/daemons/rg_state.c | 5 ++++- rgmanager/src/resources/vm.sh | 6 +++++- 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/rgmanager/include/resgroup.h b/rgmanager/include/resgroup.h index 5dc84d4..7011a0c 100644 --- a/rgmanager/include/resgroup.h +++ b/rgmanager/include/resgroup.h @@ -196,6 +196,8 @@ int rg_unlock(struct dlm_lksb *p); /* Return codes */ +#define RG_ERELO -17 /* Relocation failure; service running + on original node */ #define RG_EEXCL -16 /* Service not runnable due to inability to start exclusively */ #define RG_EDOMAIN -15 /* Service not runnable given the diff --git a/rgmanager/src/clulib/rg_strings.c b/rgmanager/src/clulib/rg_strings.c index 35c570d..cc34a9f 100644 --- a/rgmanager/src/clulib/rg_strings.c +++ b/rgmanager/src/clulib/rg_strings.c @@ -8,6 +8,7 @@ struct string_val { const struct string_val rg_error_strings[] = { + { RG_ERELO, "Failed; service running on original owner" }, { RG_EDOMAIN, "Service not runnable" }, { RG_ESCRIPT, "S/Lang Script Error" }, { RG_EFENCE, "Fencing operation pending; try again later" }, diff --git a/rgmanager/src/daemons/rg_state.c b/rgmanager/src/daemons/rg_state.c index 2346c44..6f80047 100644 --- a/rgmanager/src/daemons/rg_state.c +++ b/rgmanager/src/daemons/rg_state.c @@ -903,6 +903,9 @@ svc_migrate(const char *svcName, int target) return RG_EFAIL; case OCF_RA_NOT_CONFIGURED: return RG_EINVAL; + case 150: /* see vm.sh */ + /* Migration failed; VM still running on source node */ + return RG_ERELO; case 0: break; } @@ -1847,7 +1850,7 @@ exhausted: svcName); if (svc_start(svcName, RG_START_RECOVER) == 0) { *new_owner = me; - return RG_EFAIL; + return RG_ERELO; } } diff --git a/rgmanager/src/resources/vm.sh b/rgmanager/src/resources/vm.sh index 3ec8b87..0b5f4b1 100644 --- a/rgmanager/src/resources/vm.sh +++ b/rgmanager/src/resources/vm.sh @@ -934,9 +934,13 @@ case $1 in # If the VM is still in good health, return # a value to rgmanager to indicate the # non-critical error + # + # OCF states that codes 150-199 are reserved + # for application use, so we'll use 150 + # do_status > /dev/null if [ $? -eq 0 ]; then - rv=$OCF_NOT_RUNNING + rv=150 fi fi exit $rv -- 1.6.2.5