* [Cluster-devel] cluster/cman man/qdisk.5 qdisk/main.c qdisk/sc ...
@ 2007-03-20 19:36 lhh
0 siblings, 0 replies; 4+ messages in thread
From: lhh @ 2007-03-20 19:36 UTC (permalink / raw)
To: cluster-devel.redhat.com
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL4
Changes by: lhh at sourceware.org 2007-03-20 19:36:15
Modified files:
cman/man : qdisk.5
cman/qdisk : main.c score.c
Log message:
Fix #220211, pass 2: ensure timings are accurate and provide multi-master conflict resolution
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/man/qdisk.5.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.4&r2=1.1.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/main.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.7&r2=1.1.2.8
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/score.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
--- cluster/cman/man/qdisk.5 2007/02/21 20:19:46 1.1.2.4
+++ cluster/cman/man/qdisk.5 2007/03/20 19:36:14 1.1.2.5
@@ -216,7 +216,7 @@
\fItko_up\fP\fB="\fPX\fB"\fP
.in 12
This is the number of cycles a node must be seen in order to be declared
-online. Default is \fBfloor(tko/2)\fP.
+online. Default is \fBfloor(tko/3)\fP.
.in 9
\fIupgrade_wait\fP\fB="\fP2\fB"\fP
@@ -229,8 +229,9 @@
\fImaster_wait\fP\fB="\fPX\fB"\fP
.in 12
This is the number of cycles a node must wait for votes before declaring
-itself master after making a bid. Default is \fBfloor(tko/3)\fP.
-This can not be less than 2 and should not exceed \fBtko\fP.
+itself master after making a bid. Default is \fBfloor(tko/2)\fP.
+This can not be less than 2, must be greater than tko_up, and should not
+exceed \fBtko\fP.
.in 9
\fIvotes\fP\fB="\fP3\fB"\fP
--- cluster/cman/qdisk/main.c 2007/02/21 20:19:43 1.1.2.7
+++ cluster/cman/qdisk/main.c 2007/03/20 19:36:14 1.1.2.8
@@ -381,22 +381,26 @@
Returns
*/
int
-master_exists(qd_ctx *ctx, node_info_t *ni, int max, int *low_id)
+master_exists(qd_ctx *ctx, node_info_t *ni, int max, int *low_id, int *count)
{
int x;
int masters = 0;
int ret = 0;
+ if (count)
+ *count = 0;
*low_id = ctx->qc_my_id;
for (x = 0; x < max; x++) {
/* See if this one's a master */
if (ni[x].ni_state >= S_RUN &&
- ni[x].ni_status.ps_state == S_MASTER) {
+ ni[x].ni_status.ps_state == S_MASTER &&
+ ni[x].ni_status.ps_nodeid != ctx->qc_my_id) {
if (!ret)
ret = ni[x].ni_status.ps_nodeid;
++masters;
+ continue;
}
/* See if it's us... */
@@ -424,11 +428,8 @@
*low_id = ni[x].ni_status.ps_nodeid;
}
- if (masters > 1) {
- clulog(LOG_CRIT,
- "Critical Error: More than one master found!\n");
- /* XXX Handle this how? */
- }
+ if (count)
+ *count = masters;
/*
else if (masters == 1) {
printf("Node %d is the master\n", ret);
@@ -849,7 +850,7 @@
{
disk_msg_t msg = {0, 0, 0};
int low_id, bid_pending = 0, score, score_max, score_req,
- upgrade = 0;
+ upgrade = 0, count;
memb_mask_t mask, master_mask;
struct timeval maxtime, oldtime, newtime, diff, sleeptime, interval;
@@ -921,11 +922,26 @@
score, score_max, score_req);
ctx->qc_status = S_RUN;
upgrade = ctx->qc_upgrade_wait;
+ bid_pending = 0;
+ msg.m_msg = M_NONE;
+ ++msg.m_seq;
}
}
/* Find master */
- ctx->qc_master = master_exists(ctx, ni, max, &low_id);
+ ctx->qc_master = master_exists(ctx, ni, max, &low_id, &count);
+
+ /* Resolve master conflict, if one exists */
+ if (count > 1 && ctx->qc_status == S_MASTER) {
+ clulog(LOG_WARNING, "Master conflict: abdicating\n");
+
+ /* Handle just like a recent upgrade */
+ ctx->qc_status = S_RUN;
+ upgrade = ctx->qc_upgrade_wait;
+ bid_pending = 0;
+ msg.m_msg = M_NONE;
+ ++msg.m_seq;
+ }
/* Figure out what to do based on what we know */
if (!ctx->qc_master &&
@@ -1163,7 +1179,7 @@
}
/* Get up-tko (transition off->online) */
- ctx->qc_tko_up = (ctx->qc_tko / 2);
+ ctx->qc_tko_up = (ctx->qc_tko / 3);
snprintf(query, sizeof(query), "/cluster/quorumd/@tko_up");
if (ccs_get(ccsfd, query, &val) == 0) {
ctx->qc_tko_up = atoi(val);
@@ -1185,14 +1201,14 @@
/* wait this many intervals after bidding for master before
becoming Caesar */
- ctx->qc_master_wait = (ctx->qc_tko / 3);
+ ctx->qc_master_wait = (ctx->qc_tko / 2);
snprintf(query, sizeof(query), "/cluster/quorumd/@master_wait");
if (ccs_get(ccsfd, query, &val) == 0) {
ctx->qc_master_wait = atoi(val);
free(val);
}
- if (ctx->qc_master_wait < 2)
- ctx->qc_master_wait = 2;
+ if (ctx->qc_master_wait <= ctx->qc_tko_up)
+ ctx->qc_master_wait = ctx->qc_tko_up + 1;
/* Get votes */
snprintf(query, sizeof(query), "/cluster/quorumd/@votes");
--- cluster/cman/qdisk/score.c 2007/02/21 20:19:43 1.1.2.3
+++ cluster/cman/qdisk/score.c 2007/03/20 19:36:14 1.1.2.4
@@ -143,7 +143,7 @@
*score = 0;
*maxscore = 0;
- printf("max = %d\n", max);
+ //printf("max = %d\n", max);
/* Allow operation w/o any heuristics */
if (!max) {
*score = *maxscore = 1;
^ permalink raw reply [flat|nested] 4+ messages in thread* [Cluster-devel] cluster/cman man/qdisk.5 qdisk/main.c qdisk/sc ...
@ 2007-03-20 19:37 lhh
0 siblings, 0 replies; 4+ messages in thread
From: lhh @ 2007-03-20 19:37 UTC (permalink / raw)
To: cluster-devel.redhat.com
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL5
Changes by: lhh at sourceware.org 2007-03-20 19:37:04
Modified files:
cman/man : qdisk.5
cman/qdisk : main.c score.c
Log message:
Fix multimaster bug: ensure timings are accurate and provide multi-master conflict resolution
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/man/qdisk.5.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.3.2.3&r2=1.3.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/main.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.4.2.5&r2=1.4.2.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/score.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.2.4.2&r2=1.2.4.3
--- cluster/cman/man/qdisk.5 2007/02/21 20:22:54 1.3.2.3
+++ cluster/cman/man/qdisk.5 2007/03/20 19:37:04 1.3.2.4
@@ -216,7 +216,7 @@
\fItko_up\fP\fB="\fPX\fB"\fP
.in 12
This is the number of cycles a node must be seen in order to be declared
-online. Default is \fBfloor(tko/2)\fP.
+online. Default is \fBfloor(tko/3)\fP.
.in 9
\fIupgrade_wait\fP\fB="\fP2\fB"\fP
@@ -229,8 +229,9 @@
\fImaster_wait\fP\fB="\fPX\fB"\fP
.in 12
This is the number of cycles a node must wait for votes before declaring
-itself master after making a bid. Default is \fBfloor(tko/3)\fP.
-This can not be less than 2 and should not exceed \fBtko\fP.
+itself master after making a bid. Default is \fBfloor(tko/2)\fP.
+This can not be less than 2, must be greater than tko_up, and should not
+exceed \fBtko\fP.
.in 9
\fIvotes\fP\fB="\fP3\fB"\fP
--- cluster/cman/qdisk/main.c 2007/02/21 20:22:53 1.4.2.5
+++ cluster/cman/qdisk/main.c 2007/03/20 19:37:04 1.4.2.6
@@ -381,22 +381,26 @@
Returns
*/
int
-master_exists(qd_ctx *ctx, node_info_t *ni, int max, int *low_id)
+master_exists(qd_ctx *ctx, node_info_t *ni, int max, int *low_id, int *count)
{
int x;
int masters = 0;
int ret = 0;
+ if (count)
+ *count = 0;
*low_id = ctx->qc_my_id;
for (x = 0; x < max; x++) {
/* See if this one's a master */
if (ni[x].ni_state >= S_RUN &&
- ni[x].ni_status.ps_state == S_MASTER) {
+ ni[x].ni_status.ps_state == S_MASTER &&
+ ni[x].ni_status.ps_nodeid != ctx->qc_my_id) {
if (!ret)
ret = ni[x].ni_status.ps_nodeid;
++masters;
+ continue;
}
/* See if it's us... */
@@ -424,11 +428,8 @@
*low_id = ni[x].ni_status.ps_nodeid;
}
- if (masters > 1) {
- clulog(LOG_CRIT,
- "Critical Error: More than one master found!\n");
- /* XXX Handle this how? */
- }
+ if (count)
+ *count = masters;
/*
else if (masters == 1) {
printf("Node %d is the master\n", ret);
@@ -849,7 +850,7 @@
{
disk_msg_t msg = {0, 0, 0};
int low_id, bid_pending = 0, score, score_max, score_req,
- upgrade = 0;
+ upgrade = 0, count;
memb_mask_t mask, master_mask;
struct timeval maxtime, oldtime, newtime, diff, sleeptime, interval;
@@ -921,11 +922,26 @@
score, score_max, score_req);
ctx->qc_status = S_RUN;
upgrade = ctx->qc_upgrade_wait;
+ bid_pending = 0;
+ msg.m_msg = M_NONE;
+ ++msg.m_seq;
}
}
/* Find master */
- ctx->qc_master = master_exists(ctx, ni, max, &low_id);
+ ctx->qc_master = master_exists(ctx, ni, max, &low_id, &count);
+
+ /* Resolve master conflict, if one exists */
+ if (count > 1 && ctx->qc_status == S_MASTER) {
+ clulog(LOG_WARNING, "Master conflict: abdicating\n");
+
+ /* Handle just like a recent upgrade */
+ ctx->qc_status = S_RUN;
+ upgrade = ctx->qc_upgrade_wait;
+ bid_pending = 0;
+ msg.m_msg = M_NONE;
+ ++msg.m_seq;
+ }
/* Figure out what to do based on what we know */
if (!ctx->qc_master &&
@@ -1163,7 +1179,7 @@
}
/* Get up-tko (transition off->online) */
- ctx->qc_tko_up = (ctx->qc_tko / 2);
+ ctx->qc_tko_up = (ctx->qc_tko / 3);
snprintf(query, sizeof(query), "/cluster/quorumd/@tko_up");
if (ccs_get(ccsfd, query, &val) == 0) {
ctx->qc_tko_up = atoi(val);
@@ -1185,14 +1201,14 @@
/* wait this many intervals after bidding for master before
becoming Caesar */
- ctx->qc_master_wait = (ctx->qc_tko / 3);
+ ctx->qc_master_wait = (ctx->qc_tko / 2);
snprintf(query, sizeof(query), "/cluster/quorumd/@master_wait");
if (ccs_get(ccsfd, query, &val) == 0) {
ctx->qc_master_wait = atoi(val);
free(val);
}
- if (ctx->qc_master_wait < 2)
- ctx->qc_master_wait = 2;
+ if (ctx->qc_master_wait <= ctx->qc_tko_up)
+ ctx->qc_master_wait = ctx->qc_tko_up + 1;
/* Get votes */
snprintf(query, sizeof(query), "/cluster/quorumd/@votes");
--- cluster/cman/qdisk/score.c 2007/02/21 20:22:53 1.2.4.2
+++ cluster/cman/qdisk/score.c 2007/03/20 19:37:04 1.2.4.3
@@ -143,7 +143,7 @@
*score = 0;
*maxscore = 0;
- printf("max = %d\n", max);
+ //printf("max = %d\n", max);
/* Allow operation w/o any heuristics */
if (!max) {
*score = *maxscore = 1;
^ permalink raw reply [flat|nested] 4+ messages in thread* [Cluster-devel] cluster/cman man/qdisk.5 qdisk/main.c qdisk/sc ...
@ 2007-03-20 19:37 lhh
0 siblings, 0 replies; 4+ messages in thread
From: lhh @ 2007-03-20 19:37 UTC (permalink / raw)
To: cluster-devel.redhat.com
CVSROOT: /cvs/cluster
Module name: cluster
Changes by: lhh at sourceware.org 2007-03-20 19:37:25
Modified files:
cman/man : qdisk.5
cman/qdisk : main.c score.c
Log message:
Fix multimaster bug: ensure timings are accurate and provide multi-master conflict resolution
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/man/qdisk.5.diff?cvsroot=cluster&r1=1.6&r2=1.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/main.c.diff?cvsroot=cluster&r1=1.9&r2=1.10
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/score.c.diff?cvsroot=cluster&r1=1.4&r2=1.5
--- cluster/cman/man/qdisk.5 2007/02/21 20:24:30 1.6
+++ cluster/cman/man/qdisk.5 2007/03/20 19:37:24 1.7
@@ -216,7 +216,7 @@
\fItko_up\fP\fB="\fPX\fB"\fP
.in 12
This is the number of cycles a node must be seen in order to be declared
-online. Default is \fBfloor(tko/2)\fP.
+online. Default is \fBfloor(tko/3)\fP.
.in 9
\fIupgrade_wait\fP\fB="\fP2\fB"\fP
@@ -229,8 +229,9 @@
\fImaster_wait\fP\fB="\fPX\fB"\fP
.in 12
This is the number of cycles a node must wait for votes before declaring
-itself master after making a bid. Default is \fBfloor(tko/3)\fP.
-This can not be less than 2 and should not exceed \fBtko\fP.
+itself master after making a bid. Default is \fBfloor(tko/2)\fP.
+This can not be less than 2, must be greater than tko_up, and should not
+exceed \fBtko\fP.
.in 9
\fIvotes\fP\fB="\fP3\fB"\fP
--- cluster/cman/qdisk/main.c 2007/02/21 20:24:30 1.9
+++ cluster/cman/qdisk/main.c 2007/03/20 19:37:24 1.10
@@ -381,22 +381,26 @@
Returns
*/
int
-master_exists(qd_ctx *ctx, node_info_t *ni, int max, int *low_id)
+master_exists(qd_ctx *ctx, node_info_t *ni, int max, int *low_id, int *count)
{
int x;
int masters = 0;
int ret = 0;
+ if (count)
+ *count = 0;
*low_id = ctx->qc_my_id;
for (x = 0; x < max; x++) {
/* See if this one's a master */
if (ni[x].ni_state >= S_RUN &&
- ni[x].ni_status.ps_state == S_MASTER) {
+ ni[x].ni_status.ps_state == S_MASTER &&
+ ni[x].ni_status.ps_nodeid != ctx->qc_my_id) {
if (!ret)
ret = ni[x].ni_status.ps_nodeid;
++masters;
+ continue;
}
/* See if it's us... */
@@ -424,11 +428,8 @@
*low_id = ni[x].ni_status.ps_nodeid;
}
- if (masters > 1) {
- clulog(LOG_CRIT,
- "Critical Error: More than one master found!\n");
- /* XXX Handle this how? */
- }
+ if (count)
+ *count = masters;
/*
else if (masters == 1) {
printf("Node %d is the master\n", ret);
@@ -849,7 +850,7 @@
{
disk_msg_t msg = {0, 0, 0};
int low_id, bid_pending = 0, score, score_max, score_req,
- upgrade = 0;
+ upgrade = 0, count;
memb_mask_t mask, master_mask;
struct timeval maxtime, oldtime, newtime, diff, sleeptime, interval;
@@ -921,11 +922,26 @@
score, score_max, score_req);
ctx->qc_status = S_RUN;
upgrade = ctx->qc_upgrade_wait;
+ bid_pending = 0;
+ msg.m_msg = M_NONE;
+ ++msg.m_seq;
}
}
/* Find master */
- ctx->qc_master = master_exists(ctx, ni, max, &low_id);
+ ctx->qc_master = master_exists(ctx, ni, max, &low_id, &count);
+
+ /* Resolve master conflict, if one exists */
+ if (count > 1 && ctx->qc_status == S_MASTER) {
+ clulog(LOG_WARNING, "Master conflict: abdicating\n");
+
+ /* Handle just like a recent upgrade */
+ ctx->qc_status = S_RUN;
+ upgrade = ctx->qc_upgrade_wait;
+ bid_pending = 0;
+ msg.m_msg = M_NONE;
+ ++msg.m_seq;
+ }
/* Figure out what to do based on what we know */
if (!ctx->qc_master &&
@@ -1163,7 +1179,7 @@
}
/* Get up-tko (transition off->online) */
- ctx->qc_tko_up = (ctx->qc_tko / 2);
+ ctx->qc_tko_up = (ctx->qc_tko / 3);
snprintf(query, sizeof(query), "/cluster/quorumd/@tko_up");
if (ccs_get(ccsfd, query, &val) == 0) {
ctx->qc_tko_up = atoi(val);
@@ -1185,14 +1201,14 @@
/* wait this many intervals after bidding for master before
becoming Caesar */
- ctx->qc_master_wait = (ctx->qc_tko / 3);
+ ctx->qc_master_wait = (ctx->qc_tko / 2);
snprintf(query, sizeof(query), "/cluster/quorumd/@master_wait");
if (ccs_get(ccsfd, query, &val) == 0) {
ctx->qc_master_wait = atoi(val);
free(val);
}
- if (ctx->qc_master_wait < 2)
- ctx->qc_master_wait = 2;
+ if (ctx->qc_master_wait <= ctx->qc_tko_up)
+ ctx->qc_master_wait = ctx->qc_tko_up + 1;
/* Get votes */
snprintf(query, sizeof(query), "/cluster/quorumd/@votes");
--- cluster/cman/qdisk/score.c 2007/02/21 20:24:30 1.4
+++ cluster/cman/qdisk/score.c 2007/03/20 19:37:24 1.5
@@ -143,7 +143,7 @@
*score = 0;
*maxscore = 0;
- printf("max = %d\n", max);
+ //printf("max = %d\n", max);
/* Allow operation w/o any heuristics */
if (!max) {
*score = *maxscore = 1;
^ permalink raw reply [flat|nested] 4+ messages in thread* [Cluster-devel] cluster/cman man/qdisk.5 qdisk/main.c qdisk/sc ...
@ 2007-03-20 19:41 lhh
0 siblings, 0 replies; 4+ messages in thread
From: lhh @ 2007-03-20 19:41 UTC (permalink / raw)
To: cluster-devel.redhat.com
CVSROOT: /cvs/cluster
Module name: cluster
Branch: STABLE
Changes by: lhh at sourceware.org 2007-03-20 19:41:52
Modified files:
cman/man : qdisk.5
cman/qdisk : main.c score.c
Log message:
Fix multimaster bug: ensure timings are accurate and provide multi-master conflict resolution
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/man/qdisk.5.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2.4.3&r2=1.2.4.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/main.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.3.2.4&r2=1.3.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/score.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2.2.3&r2=1.2.2.4
--- cluster/cman/man/qdisk.5 2007/02/21 20:25:09 1.2.4.3
+++ cluster/cman/man/qdisk.5 2007/03/20 19:41:52 1.2.4.4
@@ -216,7 +216,7 @@
\fItko_up\fP\fB="\fPX\fB"\fP
.in 12
This is the number of cycles a node must be seen in order to be declared
-online. Default is \fBfloor(tko/2)\fP.
+online. Default is \fBfloor(tko/3)\fP.
.in 9
\fIupgrade_wait\fP\fB="\fP2\fB"\fP
@@ -229,8 +229,9 @@
\fImaster_wait\fP\fB="\fPX\fB"\fP
.in 12
This is the number of cycles a node must wait for votes before declaring
-itself master after making a bid. Default is \fBfloor(tko/3)\fP.
-This can not be less than 2 and should not exceed \fBtko\fP.
+itself master after making a bid. Default is \fBfloor(tko/2)\fP.
+This can not be less than 2, must be greater than tko_up, and should not
+exceed \fBtko\fP.
.in 9
\fIvotes\fP\fB="\fP3\fB"\fP
@@ -314,6 +315,16 @@
disconnected from the SAN. The default for this value is 0 (off).
.in 9
+\fIuse_uptime\fP\fB="\fP1\fB"\fP
+.in 12
+If this parameter is set to 1 (on), qdiskd will use values from
+/proc/uptime for internal timings. This is a bit less precise
+than \fBgettimeofday(2)\fP, but the benefit is that changing the
+system clock will not affect qdiskd's behavior - even if \fBparanoid\fP
+is enabled. If set to 0, qdiskd will use \fBgettimeofday(2)\fP, which
+is more precise. The default for this value is 1 (on / use uptime).
+
+.in 9
\fIdevice\fP\fB="\fP/dev/sda1\fB"\fP
.in 12
This is the device the quorum daemon will use. This device must be the
@@ -432,4 +443,4 @@
for more details.
.SH "SEE ALSO"
-mkqdisk(8), qdiskd(8), cman(5), syslog.conf(5)
+mkqdisk(8), qdiskd(8), cman(5), syslog.conf(5), gettimeofday(2)
--- cluster/cman/qdisk/main.c 2007/02/21 20:25:09 1.3.2.4
+++ cluster/cman/qdisk/main.c 2007/03/20 19:41:52 1.3.2.5
@@ -381,22 +381,26 @@
Returns
*/
int
-master_exists(qd_ctx *ctx, node_info_t *ni, int max, int *low_id)
+master_exists(qd_ctx *ctx, node_info_t *ni, int max, int *low_id, int *count)
{
int x;
int masters = 0;
int ret = 0;
+ if (count)
+ *count = 0;
*low_id = ctx->qc_my_id;
for (x = 0; x < max; x++) {
/* See if this one's a master */
if (ni[x].ni_state >= S_RUN &&
- ni[x].ni_status.ps_state == S_MASTER) {
+ ni[x].ni_status.ps_state == S_MASTER &&
+ ni[x].ni_status.ps_nodeid != ctx->qc_my_id) {
if (!ret)
ret = ni[x].ni_status.ps_nodeid;
++masters;
+ continue;
}
/* See if it's us... */
@@ -424,11 +428,8 @@
*low_id = ni[x].ni_status.ps_nodeid;
}
- if (masters > 1) {
- clulog(LOG_CRIT,
- "Critical Error: More than one master found!\n");
- /* XXX Handle this how? */
- }
+ if (count)
+ *count = masters;
/*
else if (masters == 1) {
printf("Node %d is the master\n", ret);
@@ -849,7 +850,7 @@
{
disk_msg_t msg = {0, 0, 0};
int low_id, bid_pending = 0, score, score_max, score_req,
- upgrade = 0;
+ upgrade = 0, count;
memb_mask_t mask, master_mask;
struct timeval maxtime, oldtime, newtime, diff, sleeptime, interval;
@@ -921,11 +922,26 @@
score, score_max, score_req);
ctx->qc_status = S_RUN;
upgrade = ctx->qc_upgrade_wait;
+ bid_pending = 0;
+ msg.m_msg = M_NONE;
+ ++msg.m_seq;
}
}
/* Find master */
- ctx->qc_master = master_exists(ctx, ni, max, &low_id);
+ ctx->qc_master = master_exists(ctx, ni, max, &low_id, &count);
+
+ /* Resolve master conflict, if one exists */
+ if (count > 1 && ctx->qc_status == S_MASTER) {
+ clulog(LOG_WARNING, "Master conflict: abdicating\n");
+
+ /* Handle just like a recent upgrade */
+ ctx->qc_status = S_RUN;
+ upgrade = ctx->qc_upgrade_wait;
+ bid_pending = 0;
+ msg.m_msg = M_NONE;
+ ++msg.m_seq;
+ }
/* Figure out what to do based on what we know */
if (!ctx->qc_master &&
@@ -1163,7 +1179,7 @@
}
/* Get up-tko (transition off->online) */
- ctx->qc_tko_up = (ctx->qc_tko / 2);
+ ctx->qc_tko_up = (ctx->qc_tko / 3);
snprintf(query, sizeof(query), "/cluster/quorumd/@tko_up");
if (ccs_get(ccsfd, query, &val) == 0) {
ctx->qc_tko_up = atoi(val);
@@ -1185,14 +1201,14 @@
/* wait this many intervals after bidding for master before
becoming Caesar */
- ctx->qc_master_wait = (ctx->qc_tko / 3);
+ ctx->qc_master_wait = (ctx->qc_tko / 2);
snprintf(query, sizeof(query), "/cluster/quorumd/@master_wait");
if (ccs_get(ccsfd, query, &val) == 0) {
ctx->qc_master_wait = atoi(val);
free(val);
}
- if (ctx->qc_master_wait < 2)
- ctx->qc_master_wait = 2;
+ if (ctx->qc_master_wait <= ctx->qc_tko_up)
+ ctx->qc_master_wait = ctx->qc_tko_up + 1;
/* Get votes */
snprintf(query, sizeof(query), "/cluster/quorumd/@votes");
--- cluster/cman/qdisk/score.c 2007/02/21 20:25:09 1.2.2.3
+++ cluster/cman/qdisk/score.c 2007/03/20 19:41:52 1.2.2.4
@@ -143,7 +143,7 @@
*score = 0;
*maxscore = 0;
- printf("max = %d\n", max);
+ //printf("max = %d\n", max);
/* Allow operation w/o any heuristics */
if (!max) {
*score = *maxscore = 1;
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2007-03-20 19:41 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-03-20 19:36 [Cluster-devel] cluster/cman man/qdisk.5 qdisk/main.c qdisk/sc lhh
-- strict thread matches above, loose matches on Subject: below --
2007-03-20 19:37 lhh
2007-03-20 19:37 lhh
2007-03-20 19:41 lhh
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).