From mboxrd@z Thu Jan 1 00:00:00 1970 From: lhh@sourceware.org Date: 20 Mar 2007 19:41:53 -0000 Subject: [Cluster-devel] cluster/cman man/qdisk.5 qdisk/main.c qdisk/sc ... Message-ID: <20070320194153.29107.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Branch: STABLE Changes by: lhh at sourceware.org 2007-03-20 19:41:52 Modified files: cman/man : qdisk.5 cman/qdisk : main.c score.c Log message: Fix multimaster bug: ensure timings are accurate and provide multi-master conflict resolution Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/man/qdisk.5.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2.4.3&r2=1.2.4.4 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/main.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.3.2.4&r2=1.3.2.5 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/score.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2.2.3&r2=1.2.2.4 --- cluster/cman/man/qdisk.5 2007/02/21 20:25:09 1.2.4.3 +++ cluster/cman/man/qdisk.5 2007/03/20 19:41:52 1.2.4.4 @@ -216,7 +216,7 @@ \fItko_up\fP\fB="\fPX\fB"\fP .in 12 This is the number of cycles a node must be seen in order to be declared -online. Default is \fBfloor(tko/2)\fP. +online. Default is \fBfloor(tko/3)\fP. .in 9 \fIupgrade_wait\fP\fB="\fP2\fB"\fP @@ -229,8 +229,9 @@ \fImaster_wait\fP\fB="\fPX\fB"\fP .in 12 This is the number of cycles a node must wait for votes before declaring -itself master after making a bid. Default is \fBfloor(tko/3)\fP. -This can not be less than 2 and should not exceed \fBtko\fP. +itself master after making a bid. Default is \fBfloor(tko/2)\fP. +This can not be less than 2, must be greater than tko_up, and should not +exceed \fBtko\fP. .in 9 \fIvotes\fP\fB="\fP3\fB"\fP @@ -314,6 +315,16 @@ disconnected from the SAN. The default for this value is 0 (off). .in 9 +\fIuse_uptime\fP\fB="\fP1\fB"\fP +.in 12 +If this parameter is set to 1 (on), qdiskd will use values from +/proc/uptime for internal timings. This is a bit less precise +than \fBgettimeofday(2)\fP, but the benefit is that changing the +system clock will not affect qdiskd's behavior - even if \fBparanoid\fP +is enabled. If set to 0, qdiskd will use \fBgettimeofday(2)\fP, which +is more precise. The default for this value is 1 (on / use uptime). + +.in 9 \fIdevice\fP\fB="\fP/dev/sda1\fB"\fP .in 12 This is the device the quorum daemon will use. This device must be the @@ -432,4 +443,4 @@ for more details. .SH "SEE ALSO" -mkqdisk(8), qdiskd(8), cman(5), syslog.conf(5) +mkqdisk(8), qdiskd(8), cman(5), syslog.conf(5), gettimeofday(2) --- cluster/cman/qdisk/main.c 2007/02/21 20:25:09 1.3.2.4 +++ cluster/cman/qdisk/main.c 2007/03/20 19:41:52 1.3.2.5 @@ -381,22 +381,26 @@ Returns */ int -master_exists(qd_ctx *ctx, node_info_t *ni, int max, int *low_id) +master_exists(qd_ctx *ctx, node_info_t *ni, int max, int *low_id, int *count) { int x; int masters = 0; int ret = 0; + if (count) + *count = 0; *low_id = ctx->qc_my_id; for (x = 0; x < max; x++) { /* See if this one's a master */ if (ni[x].ni_state >= S_RUN && - ni[x].ni_status.ps_state == S_MASTER) { + ni[x].ni_status.ps_state == S_MASTER && + ni[x].ni_status.ps_nodeid != ctx->qc_my_id) { if (!ret) ret = ni[x].ni_status.ps_nodeid; ++masters; + continue; } /* See if it's us... */ @@ -424,11 +428,8 @@ *low_id = ni[x].ni_status.ps_nodeid; } - if (masters > 1) { - clulog(LOG_CRIT, - "Critical Error: More than one master found!\n"); - /* XXX Handle this how? */ - } + if (count) + *count = masters; /* else if (masters == 1) { printf("Node %d is the master\n", ret); @@ -849,7 +850,7 @@ { disk_msg_t msg = {0, 0, 0}; int low_id, bid_pending = 0, score, score_max, score_req, - upgrade = 0; + upgrade = 0, count; memb_mask_t mask, master_mask; struct timeval maxtime, oldtime, newtime, diff, sleeptime, interval; @@ -921,11 +922,26 @@ score, score_max, score_req); ctx->qc_status = S_RUN; upgrade = ctx->qc_upgrade_wait; + bid_pending = 0; + msg.m_msg = M_NONE; + ++msg.m_seq; } } /* Find master */ - ctx->qc_master = master_exists(ctx, ni, max, &low_id); + ctx->qc_master = master_exists(ctx, ni, max, &low_id, &count); + + /* Resolve master conflict, if one exists */ + if (count > 1 && ctx->qc_status == S_MASTER) { + clulog(LOG_WARNING, "Master conflict: abdicating\n"); + + /* Handle just like a recent upgrade */ + ctx->qc_status = S_RUN; + upgrade = ctx->qc_upgrade_wait; + bid_pending = 0; + msg.m_msg = M_NONE; + ++msg.m_seq; + } /* Figure out what to do based on what we know */ if (!ctx->qc_master && @@ -1163,7 +1179,7 @@ } /* Get up-tko (transition off->online) */ - ctx->qc_tko_up = (ctx->qc_tko / 2); + ctx->qc_tko_up = (ctx->qc_tko / 3); snprintf(query, sizeof(query), "/cluster/quorumd/@tko_up"); if (ccs_get(ccsfd, query, &val) == 0) { ctx->qc_tko_up = atoi(val); @@ -1185,14 +1201,14 @@ /* wait this many intervals after bidding for master before becoming Caesar */ - ctx->qc_master_wait = (ctx->qc_tko / 3); + ctx->qc_master_wait = (ctx->qc_tko / 2); snprintf(query, sizeof(query), "/cluster/quorumd/@master_wait"); if (ccs_get(ccsfd, query, &val) == 0) { ctx->qc_master_wait = atoi(val); free(val); } - if (ctx->qc_master_wait < 2) - ctx->qc_master_wait = 2; + if (ctx->qc_master_wait <= ctx->qc_tko_up) + ctx->qc_master_wait = ctx->qc_tko_up + 1; /* Get votes */ snprintf(query, sizeof(query), "/cluster/quorumd/@votes"); --- cluster/cman/qdisk/score.c 2007/02/21 20:25:09 1.2.2.3 +++ cluster/cman/qdisk/score.c 2007/03/20 19:41:52 1.2.2.4 @@ -143,7 +143,7 @@ *score = 0; *maxscore = 0; - printf("max = %d\n", max); + //printf("max = %d\n", max); /* Allow operation w/o any heuristics */ if (!max) { *score = *maxscore = 1;