From: lhh@sourceware.org <lhh@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/cman man/qdisk.5 qdisk/main.c qdisk/sc ...
Date: 20 Mar 2007 19:36:15 -0000 [thread overview]
Message-ID: <20070320193615.25086.qmail@sourceware.org> (raw)
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL4
Changes by: lhh at sourceware.org 2007-03-20 19:36:15
Modified files:
cman/man : qdisk.5
cman/qdisk : main.c score.c
Log message:
Fix #220211, pass 2: ensure timings are accurate and provide multi-master conflict resolution
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/man/qdisk.5.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.4&r2=1.1.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/main.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.7&r2=1.1.2.8
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/score.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
--- cluster/cman/man/qdisk.5 2007/02/21 20:19:46 1.1.2.4
+++ cluster/cman/man/qdisk.5 2007/03/20 19:36:14 1.1.2.5
@@ -216,7 +216,7 @@
\fItko_up\fP\fB="\fPX\fB"\fP
.in 12
This is the number of cycles a node must be seen in order to be declared
-online. Default is \fBfloor(tko/2)\fP.
+online. Default is \fBfloor(tko/3)\fP.
.in 9
\fIupgrade_wait\fP\fB="\fP2\fB"\fP
@@ -229,8 +229,9 @@
\fImaster_wait\fP\fB="\fPX\fB"\fP
.in 12
This is the number of cycles a node must wait for votes before declaring
-itself master after making a bid. Default is \fBfloor(tko/3)\fP.
-This can not be less than 2 and should not exceed \fBtko\fP.
+itself master after making a bid. Default is \fBfloor(tko/2)\fP.
+This can not be less than 2, must be greater than tko_up, and should not
+exceed \fBtko\fP.
.in 9
\fIvotes\fP\fB="\fP3\fB"\fP
--- cluster/cman/qdisk/main.c 2007/02/21 20:19:43 1.1.2.7
+++ cluster/cman/qdisk/main.c 2007/03/20 19:36:14 1.1.2.8
@@ -381,22 +381,26 @@
Returns
*/
int
-master_exists(qd_ctx *ctx, node_info_t *ni, int max, int *low_id)
+master_exists(qd_ctx *ctx, node_info_t *ni, int max, int *low_id, int *count)
{
int x;
int masters = 0;
int ret = 0;
+ if (count)
+ *count = 0;
*low_id = ctx->qc_my_id;
for (x = 0; x < max; x++) {
/* See if this one's a master */
if (ni[x].ni_state >= S_RUN &&
- ni[x].ni_status.ps_state == S_MASTER) {
+ ni[x].ni_status.ps_state == S_MASTER &&
+ ni[x].ni_status.ps_nodeid != ctx->qc_my_id) {
if (!ret)
ret = ni[x].ni_status.ps_nodeid;
++masters;
+ continue;
}
/* See if it's us... */
@@ -424,11 +428,8 @@
*low_id = ni[x].ni_status.ps_nodeid;
}
- if (masters > 1) {
- clulog(LOG_CRIT,
- "Critical Error: More than one master found!\n");
- /* XXX Handle this how? */
- }
+ if (count)
+ *count = masters;
/*
else if (masters == 1) {
printf("Node %d is the master\n", ret);
@@ -849,7 +850,7 @@
{
disk_msg_t msg = {0, 0, 0};
int low_id, bid_pending = 0, score, score_max, score_req,
- upgrade = 0;
+ upgrade = 0, count;
memb_mask_t mask, master_mask;
struct timeval maxtime, oldtime, newtime, diff, sleeptime, interval;
@@ -921,11 +922,26 @@
score, score_max, score_req);
ctx->qc_status = S_RUN;
upgrade = ctx->qc_upgrade_wait;
+ bid_pending = 0;
+ msg.m_msg = M_NONE;
+ ++msg.m_seq;
}
}
/* Find master */
- ctx->qc_master = master_exists(ctx, ni, max, &low_id);
+ ctx->qc_master = master_exists(ctx, ni, max, &low_id, &count);
+
+ /* Resolve master conflict, if one exists */
+ if (count > 1 && ctx->qc_status == S_MASTER) {
+ clulog(LOG_WARNING, "Master conflict: abdicating\n");
+
+ /* Handle just like a recent upgrade */
+ ctx->qc_status = S_RUN;
+ upgrade = ctx->qc_upgrade_wait;
+ bid_pending = 0;
+ msg.m_msg = M_NONE;
+ ++msg.m_seq;
+ }
/* Figure out what to do based on what we know */
if (!ctx->qc_master &&
@@ -1163,7 +1179,7 @@
}
/* Get up-tko (transition off->online) */
- ctx->qc_tko_up = (ctx->qc_tko / 2);
+ ctx->qc_tko_up = (ctx->qc_tko / 3);
snprintf(query, sizeof(query), "/cluster/quorumd/@tko_up");
if (ccs_get(ccsfd, query, &val) == 0) {
ctx->qc_tko_up = atoi(val);
@@ -1185,14 +1201,14 @@
/* wait this many intervals after bidding for master before
becoming Caesar */
- ctx->qc_master_wait = (ctx->qc_tko / 3);
+ ctx->qc_master_wait = (ctx->qc_tko / 2);
snprintf(query, sizeof(query), "/cluster/quorumd/@master_wait");
if (ccs_get(ccsfd, query, &val) == 0) {
ctx->qc_master_wait = atoi(val);
free(val);
}
- if (ctx->qc_master_wait < 2)
- ctx->qc_master_wait = 2;
+ if (ctx->qc_master_wait <= ctx->qc_tko_up)
+ ctx->qc_master_wait = ctx->qc_tko_up + 1;
/* Get votes */
snprintf(query, sizeof(query), "/cluster/quorumd/@votes");
--- cluster/cman/qdisk/score.c 2007/02/21 20:19:43 1.1.2.3
+++ cluster/cman/qdisk/score.c 2007/03/20 19:36:14 1.1.2.4
@@ -143,7 +143,7 @@
*score = 0;
*maxscore = 0;
- printf("max = %d\n", max);
+ //printf("max = %d\n", max);
/* Allow operation w/o any heuristics */
if (!max) {
*score = *maxscore = 1;
next reply other threads:[~2007-03-20 19:36 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-03-20 19:36 lhh [this message]
-- strict thread matches above, loose matches on Subject: below --
2007-03-20 19:37 [Cluster-devel] cluster/cman man/qdisk.5 qdisk/main.c qdisk/sc lhh
2007-03-20 19:37 lhh
2007-03-20 19:41 lhh
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070320193615.25086.qmail@sourceware.org \
--to=lhh@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.