cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed
* [Cluster-devel] [PATCH 1/2] rgmanager: Do hard shut down if CMAN dies
@ 2010-10-28 21:17 Lon Hohberger
  2010-10-28 21:17 ` [Cluster-devel] [PATCH 2/2] rgmanager: Work around lockspace release hang Lon Hohberger
  0 siblings, 1 reply; 3+ messages in thread
From: Lon Hohberger @ 2010-10-28 21:17 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Resolves: rhbz#639961

Signed-off-by: Lon Hohberger <lhh@redhat.com>
---
 rgmanager/src/clulib/msg_cluster.c |    7 ++++++-
 1 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/rgmanager/src/clulib/msg_cluster.c b/rgmanager/src/clulib/msg_cluster.c
index 4ec3750..8dc22d0 100644
--- a/rgmanager/src/clulib/msg_cluster.c
+++ b/rgmanager/src/clulib/msg_cluster.c
@@ -35,6 +35,8 @@
 #include <cman-private.h>
 #include <clulog.h>
 
+static void process_cman_event(cman_handle_t handle, void *private,
+			       int reason, int arg);
 /* Ripped from ccsd's setup_local_socket */
 
 int cluster_msg_close(msgctx_t *ctx);
@@ -207,7 +209,10 @@ poll_cluster_messages(int timeout)
 			return -1;
 		}
 
-		cman_dispatch(ch, 0);
+		if (cman_dispatch(ch, 0) < 0) {
+			process_cman_event(ch, NULL,
+					   CMAN_REASON_TRY_SHUTDOWN, 0);
+		}
 		ret = 0;
 	}
 	cman_unlock(ch);
-- 
1.7.2.3



^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [Cluster-devel] [PATCH 2/2] rgmanager: Work around lockspace release hang
  2010-10-28 21:17 [Cluster-devel] [PATCH 1/2] rgmanager: Do hard shut down if CMAN dies Lon Hohberger
@ 2010-10-28 21:17 ` Lon Hohberger
  2010-10-29  6:46   ` Fabio M. Di Nitto
  0 siblings, 1 reply; 3+ messages in thread
From: Lon Hohberger @ 2010-10-28 21:17 UTC (permalink / raw)
  To: cluster-devel.redhat.com

If CMAN dies uncleanly (ex: because of cman_kill_node() call
on another cluster node), rgmanager would hang trying to
release the lock space, preventing it from exiting and causing
it to spin.

This patch works around the hang during unclean shutdown
situations.

Resolves: rhbz#639961

Signed-off-by: Lon Hohberger <lhh@redhat.com>
---
 rgmanager/src/daemons/main.c |   11 ++++++++---
 1 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/rgmanager/src/daemons/main.c b/rgmanager/src/daemons/main.c
index 64c32a3..52e38bc 100644
--- a/rgmanager/src/daemons/main.c
+++ b/rgmanager/src/daemons/main.c
@@ -64,7 +64,7 @@ int node_has_fencing(int nodeid);
 int fence_domain_joined(void);
 
 int cluster_timeout = 10;
-int shutdown_pending = 0, running = 1, need_reconfigure = 0;
+int shutdown_pending = 0, running = 1, need_reconfigure = 0, dying = 0;
 char debug = 0; /* XXX* */
 static int signalled = 0;
 static int port = RG_PORT;
@@ -676,12 +676,14 @@ handle_cluster_event(msgctx_t *ctx)
 		msg_receive(ctx, NULL, 0, 0);
 		clulog(LOG_WARNING, "#67: Shutting down uncleanly\n");
 		rg_set_inquorate();
-		rg_doall(RG_INIT, 1, "Emergency stop of %s");
+		rg_doall(RG_INIT, 1, "Emergency stop of %s\n");
 		rg_clear_initialized(0);
 #if defined(LIBCMAN_VERSION) && LIBCMAN_VERSION >= 2
 		/* cman_replyto_shutdown() */
 #endif
 		running = 0;
+		dying = 1; /* XXX Hack to work around hang during
+		              unclean lockspace release */
 		break;
 	}
 
@@ -1180,7 +1182,10 @@ main(int argc, char **argv)
 		cleanup(cluster_ctx);
 
 out_cleanup:
-	clu_lock_finished(rgmanager_lsname);
+	/* XXX - This hangs if CMAN has died, so we skip if we are
+	 *       exiting uncleanly. */
+	if (!dying)
+		clu_lock_finished(rgmanager_lsname);
 
 out:
 	clulog(LOG_NOTICE, "Shutdown complete, exiting\n");
-- 
1.7.2.3



^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [Cluster-devel] [PATCH 2/2] rgmanager: Work around lockspace release hang
  2010-10-28 21:17 ` [Cluster-devel] [PATCH 2/2] rgmanager: Work around lockspace release hang Lon Hohberger
@ 2010-10-29  6:46   ` Fabio M. Di Nitto
  0 siblings, 0 replies; 3+ messages in thread
From: Fabio M. Di Nitto @ 2010-10-29  6:46 UTC (permalink / raw)
  To: cluster-devel.redhat.com


ACK on both patches.

Fabio

On 10/28/2010 11:17 PM, Lon Hohberger wrote:
> If CMAN dies uncleanly (ex: because of cman_kill_node() call
> on another cluster node), rgmanager would hang trying to
> release the lock space, preventing it from exiting and causing
> it to spin.
> 
> This patch works around the hang during unclean shutdown
> situations.
> 
> Resolves: rhbz#639961
> 
> Signed-off-by: Lon Hohberger <lhh@redhat.com>
> ---
>  rgmanager/src/daemons/main.c |   11 ++++++++---
>  1 files changed, 8 insertions(+), 3 deletions(-)
> 
> diff --git a/rgmanager/src/daemons/main.c b/rgmanager/src/daemons/main.c
> index 64c32a3..52e38bc 100644
> --- a/rgmanager/src/daemons/main.c
> +++ b/rgmanager/src/daemons/main.c
> @@ -64,7 +64,7 @@ int node_has_fencing(int nodeid);
>  int fence_domain_joined(void);
>  
>  int cluster_timeout = 10;
> -int shutdown_pending = 0, running = 1, need_reconfigure = 0;
> +int shutdown_pending = 0, running = 1, need_reconfigure = 0, dying = 0;
>  char debug = 0; /* XXX* */
>  static int signalled = 0;
>  static int port = RG_PORT;
> @@ -676,12 +676,14 @@ handle_cluster_event(msgctx_t *ctx)
>  		msg_receive(ctx, NULL, 0, 0);
>  		clulog(LOG_WARNING, "#67: Shutting down uncleanly\n");
>  		rg_set_inquorate();
> -		rg_doall(RG_INIT, 1, "Emergency stop of %s");
> +		rg_doall(RG_INIT, 1, "Emergency stop of %s\n");
>  		rg_clear_initialized(0);
>  #if defined(LIBCMAN_VERSION) && LIBCMAN_VERSION >= 2
>  		/* cman_replyto_shutdown() */
>  #endif
>  		running = 0;
> +		dying = 1; /* XXX Hack to work around hang during
> +		              unclean lockspace release */
>  		break;
>  	}
>  
> @@ -1180,7 +1182,10 @@ main(int argc, char **argv)
>  		cleanup(cluster_ctx);
>  
>  out_cleanup:
> -	clu_lock_finished(rgmanager_lsname);
> +	/* XXX - This hangs if CMAN has died, so we skip if we are
> +	 *       exiting uncleanly. */
> +	if (!dying)
> +		clu_lock_finished(rgmanager_lsname);
>  
>  out:
>  	clulog(LOG_NOTICE, "Shutdown complete, exiting\n");



^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2010-10-29  6:46 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-10-28 21:17 [Cluster-devel] [PATCH 1/2] rgmanager: Do hard shut down if CMAN dies Lon Hohberger
2010-10-28 21:17 ` [Cluster-devel] [PATCH 2/2] rgmanager: Work around lockspace release hang Lon Hohberger
2010-10-29  6:46   ` Fabio M. Di Nitto

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).