From mboxrd@z Thu Jan 1 00:00:00 1970 From: Lon Hohberger Date: Wed, 10 Nov 2010 09:39:26 -0500 Subject: [Cluster-devel] [PATCH] qdiskd: (RHEL56) Don't write evictions if allow_kill is off Message-ID: <1289399966-17305-1-git-send-email-lhh@redhat.com> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Previously, qdisk master would write an eviction notice to disk for a hung qdisk node even if allow_kill was off, causing the other node to reboot. This patch causes the qdisk master to write S_NONE as the state of hung nodes on-disk when allow_kill is off instead of S_EVICT. So, when the node wakes up, it will read the S_NONE state and take action based on that state instead of reading S_EVICT and rebooting. Because there is so much internal qdiskd state which would need to be fixed on a node which is in this state (including rejoining the qdisk membership), the only clean method to continue operations is to restart qdiskd. Resolves: rhbz#602731 Signed-off-by: Lon Hohberger --- cman/qdisk/main.c | 80 +++++++++++++++++++++++++++++++++++++++++------------ 1 files changed, 62 insertions(+), 18 deletions(-) diff --git a/cman/qdisk/main.c b/cman/qdisk/main.c index 153b190..1eb10a6 100644 --- a/cman/qdisk/main.c +++ b/cman/qdisk/main.c @@ -109,17 +109,36 @@ node_info_init(node_info_t *ni, int max) } +static void +reincarnate(void) +{ + char buf[PATH_MAX]; + char cmd[PATH_MAX]; + + clulog(LOG_CRIT, "Attempting to restart\n"); + + snprintf(buf, sizeof(buf), "/proc/%d/exe", getpid()); + if (readlink(buf, cmd, sizeof(cmd)) < 0) + goto out_die; + + execlp(cmd, cmd, NULL); +out_die: + clulog(LOG_CRIT, "Unable to restart; dying.\n"); + exit(-1); +} + + /** Check to see if someone tried to evict us but we were out to lunch. Rare case; usually other nodes would put up the 'Undead' message and re-evict us. */ -void +static int check_self(qd_ctx *ctx, status_block_t *sb) { if (!sb->ps_updatenode || (sb->ps_updatenode == ctx->qc_my_id)) { - return; + return 0; } /* I did not update this??! */ @@ -127,10 +146,16 @@ check_self(qd_ctx *ctx, status_block_t *sb) case S_EVICT: /* Someone told us to die. */ reboot(RB_AUTOBOOT); + case S_NONE: + return -1; default: - clulog(LOG_EMERG, "Unhandled state: %d\n", sb->ps_state); - raise(SIGSTOP); + break; } + + clulog(LOG_EMERG, "Unhandled state: %d\n", sb->ps_state); + raise(SIGSTOP); + + return -1; } @@ -160,9 +185,11 @@ read_node_blocks(qd_ctx *ctx, node_info_t *ni, int max) swab_status_block_t(sb); if (sb->ps_nodeid == ctx->qc_my_id) { - check_self(ctx, sb); + if (check_self(ctx, sb) < 0) + reincarnate(); continue; } + /* message. */ memcpy(&(ni[x].ni_last_msg), &(ni[x].ni_msg), sizeof(ni[x].ni_last_msg)); @@ -278,17 +305,26 @@ check_transitions(qd_ctx *ctx, node_info_t *ni, int max, memb_mask_t mask) Write eviction notice if we're the master. */ if (ctx->qc_status == S_MASTER) { - clulog(LOG_NOTICE, - "Writing eviction notice for node %d\n", - ni[x].ni_status.ps_nodeid); - qd_write_status(ctx, ni[x].ni_status.ps_nodeid, - S_EVICT, NULL, NULL, NULL); + if (ctx->qc_flags & RF_ALLOW_KILL) { + clulog(LOG_NOTICE, + "Writing eviction notice for node %d\n", + ni[x].ni_status.ps_nodeid); + qd_write_status(ctx, ni[x].ni_status.ps_nodeid, + S_EVICT, NULL, NULL, NULL); clulog(LOG_DEBUG, "Telling CMAN to " "kill the node\n"); cman_kill_node(ctx->qc_ch, ni[x].ni_status.ps_nodeid); + } else { + clulog(LOG_NOTICE, + "Node %d should be evicted, but " + "allow_kill is off\n", + ni[x].ni_status.ps_nodeid); + qd_write_status(ctx, ni[x].ni_status.ps_nodeid, + S_NONE, NULL, NULL, NULL); } + } /* Clear our master mask for the node after eviction */ @@ -313,20 +349,28 @@ check_transitions(qd_ctx *ctx, node_info_t *ni, int max, memb_mask_t mask) clulog(LOG_CRIT, "Node %d is undead.\n", ni[x].ni_status.ps_nodeid); - clulog(LOG_ALERT, - "Writing eviction notice (again) for node %d\n", - ni[x].ni_status.ps_nodeid); - qd_write_status(ctx, ni[x].ni_status.ps_nodeid, - S_EVICT, NULL, NULL, NULL); - ni[x].ni_status.ps_state = S_EVICT; - - /* XXX Need to fence it again */ if (ctx->qc_flags & RF_ALLOW_KILL) { + clulog(LOG_ALERT, + "Writing eviction notice (again) for node %d\n", + ni[x].ni_status.ps_nodeid); + qd_write_status(ctx, ni[x].ni_status.ps_nodeid, + S_EVICT, NULL, NULL, NULL); + ni[x].ni_status.ps_state = S_EVICT; + + /* XXX Need to fence it again */ clulog(LOG_DEBUG, "Telling CMAN to " "kill the node\n"); cman_kill_node(ctx->qc_ch, ni[x].ni_status.ps_nodeid); + } else { + /* administrator doesn't care */ + clulog(LOG_DEBUG, + "Ignoring zombie node %d since " + "allow_kill is off\n", + ni[x].ni_status.ps_nodeid); + ni[x].ni_evil_incarnation = 0; } + continue; } -- 1.7.2.3