From: lhh@sourceware.org <lhh@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/cman man/qdisk.5 qdisk/main.c qdisk/sc ...
Date: 20 Mar 2007 19:37:26 -0000 [thread overview]
Message-ID: <20070320193726.25629.qmail@sourceware.org> (raw)
CVSROOT: /cvs/cluster
Module name: cluster
Changes by: lhh at sourceware.org 2007-03-20 19:37:25
Modified files:
cman/man : qdisk.5
cman/qdisk : main.c score.c
Log message:
Fix multimaster bug: ensure timings are accurate and provide multi-master conflict resolution
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/man/qdisk.5.diff?cvsroot=cluster&r1=1.6&r2=1.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/main.c.diff?cvsroot=cluster&r1=1.9&r2=1.10
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/score.c.diff?cvsroot=cluster&r1=1.4&r2=1.5
--- cluster/cman/man/qdisk.5 2007/02/21 20:24:30 1.6
+++ cluster/cman/man/qdisk.5 2007/03/20 19:37:24 1.7
@@ -216,7 +216,7 @@
\fItko_up\fP\fB="\fPX\fB"\fP
.in 12
This is the number of cycles a node must be seen in order to be declared
-online. Default is \fBfloor(tko/2)\fP.
+online. Default is \fBfloor(tko/3)\fP.
.in 9
\fIupgrade_wait\fP\fB="\fP2\fB"\fP
@@ -229,8 +229,9 @@
\fImaster_wait\fP\fB="\fPX\fB"\fP
.in 12
This is the number of cycles a node must wait for votes before declaring
-itself master after making a bid. Default is \fBfloor(tko/3)\fP.
-This can not be less than 2 and should not exceed \fBtko\fP.
+itself master after making a bid. Default is \fBfloor(tko/2)\fP.
+This can not be less than 2, must be greater than tko_up, and should not
+exceed \fBtko\fP.
.in 9
\fIvotes\fP\fB="\fP3\fB"\fP
--- cluster/cman/qdisk/main.c 2007/02/21 20:24:30 1.9
+++ cluster/cman/qdisk/main.c 2007/03/20 19:37:24 1.10
@@ -381,22 +381,26 @@
Returns
*/
int
-master_exists(qd_ctx *ctx, node_info_t *ni, int max, int *low_id)
+master_exists(qd_ctx *ctx, node_info_t *ni, int max, int *low_id, int *count)
{
int x;
int masters = 0;
int ret = 0;
+ if (count)
+ *count = 0;
*low_id = ctx->qc_my_id;
for (x = 0; x < max; x++) {
/* See if this one's a master */
if (ni[x].ni_state >= S_RUN &&
- ni[x].ni_status.ps_state == S_MASTER) {
+ ni[x].ni_status.ps_state == S_MASTER &&
+ ni[x].ni_status.ps_nodeid != ctx->qc_my_id) {
if (!ret)
ret = ni[x].ni_status.ps_nodeid;
++masters;
+ continue;
}
/* See if it's us... */
@@ -424,11 +428,8 @@
*low_id = ni[x].ni_status.ps_nodeid;
}
- if (masters > 1) {
- clulog(LOG_CRIT,
- "Critical Error: More than one master found!\n");
- /* XXX Handle this how? */
- }
+ if (count)
+ *count = masters;
/*
else if (masters == 1) {
printf("Node %d is the master\n", ret);
@@ -849,7 +850,7 @@
{
disk_msg_t msg = {0, 0, 0};
int low_id, bid_pending = 0, score, score_max, score_req,
- upgrade = 0;
+ upgrade = 0, count;
memb_mask_t mask, master_mask;
struct timeval maxtime, oldtime, newtime, diff, sleeptime, interval;
@@ -921,11 +922,26 @@
score, score_max, score_req);
ctx->qc_status = S_RUN;
upgrade = ctx->qc_upgrade_wait;
+ bid_pending = 0;
+ msg.m_msg = M_NONE;
+ ++msg.m_seq;
}
}
/* Find master */
- ctx->qc_master = master_exists(ctx, ni, max, &low_id);
+ ctx->qc_master = master_exists(ctx, ni, max, &low_id, &count);
+
+ /* Resolve master conflict, if one exists */
+ if (count > 1 && ctx->qc_status == S_MASTER) {
+ clulog(LOG_WARNING, "Master conflict: abdicating\n");
+
+ /* Handle just like a recent upgrade */
+ ctx->qc_status = S_RUN;
+ upgrade = ctx->qc_upgrade_wait;
+ bid_pending = 0;
+ msg.m_msg = M_NONE;
+ ++msg.m_seq;
+ }
/* Figure out what to do based on what we know */
if (!ctx->qc_master &&
@@ -1163,7 +1179,7 @@
}
/* Get up-tko (transition off->online) */
- ctx->qc_tko_up = (ctx->qc_tko / 2);
+ ctx->qc_tko_up = (ctx->qc_tko / 3);
snprintf(query, sizeof(query), "/cluster/quorumd/@tko_up");
if (ccs_get(ccsfd, query, &val) == 0) {
ctx->qc_tko_up = atoi(val);
@@ -1185,14 +1201,14 @@
/* wait this many intervals after bidding for master before
becoming Caesar */
- ctx->qc_master_wait = (ctx->qc_tko / 3);
+ ctx->qc_master_wait = (ctx->qc_tko / 2);
snprintf(query, sizeof(query), "/cluster/quorumd/@master_wait");
if (ccs_get(ccsfd, query, &val) == 0) {
ctx->qc_master_wait = atoi(val);
free(val);
}
- if (ctx->qc_master_wait < 2)
- ctx->qc_master_wait = 2;
+ if (ctx->qc_master_wait <= ctx->qc_tko_up)
+ ctx->qc_master_wait = ctx->qc_tko_up + 1;
/* Get votes */
snprintf(query, sizeof(query), "/cluster/quorumd/@votes");
--- cluster/cman/qdisk/score.c 2007/02/21 20:24:30 1.4
+++ cluster/cman/qdisk/score.c 2007/03/20 19:37:24 1.5
@@ -143,7 +143,7 @@
*score = 0;
*maxscore = 0;
- printf("max = %d\n", max);
+ //printf("max = %d\n", max);
/* Allow operation w/o any heuristics */
if (!max) {
*score = *maxscore = 1;
next reply other threads:[~2007-03-20 19:37 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-03-20 19:37 lhh [this message]
-- strict thread matches above, loose matches on Subject: below --
2007-03-20 19:41 [Cluster-devel] cluster/cman man/qdisk.5 qdisk/main.c qdisk/sc lhh
2007-03-20 19:37 lhh
2007-03-20 19:36 lhh
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070320193726.25629.qmail@sourceware.org \
--to=lhh@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).