From mboxrd@z Thu Jan 1 00:00:00 1970 From: teigland@sourceware.org Date: 14 Aug 2006 21:01:54 -0000 Subject: [Cluster-devel] cluster/group/daemon app.c Message-ID: <20060814210154.16176.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Changes by: teigland at sourceware.org 2006-08-14 21:01:54 Modified files: group/daemon : app.c Log message: Code that starts groups in order of level during recovery wasn't working right in the case where a node fails while mounting, i.e. node fails after it's joined the level2 mountgroup but before it joins the level1 lockspace. Code now checks that all lower levels are recovered instead of just checking that level-1 is recovered. Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/daemon/app.c.diff?cvsroot=cluster&r1=1.39&r2=1.40 --- cluster/group/daemon/app.c 2006/08/02 20:30:35 1.39 +++ cluster/group/daemon/app.c 2006/08/14 21:01:54 1.40 @@ -379,19 +379,6 @@ return 1; } -static int level_is_recovered(struct recovery_set *rs, int level) -{ - struct recovery_entry *re; - - list_for_each_entry(re, &rs->entries, list) { - if (re->group->level != level) - continue; - if (!re->recovered) - return 0; - } - return 1; -} - void dump_recovery_sets(void) { struct recovery_set *rs; @@ -407,33 +394,47 @@ } } -/* lower level group should be recovered in each recovery set */ - -static int lower_level_recovered(group_t *g) +static int group_in_recovery_set(struct recovery_set *rs, group_t *g) { - struct recovery_set *rs; struct recovery_entry *re; - int found = 0; list_for_each_entry(rs, &recovery_sets, list) { list_for_each_entry(re, &rs->entries, list) { - if (re->group == g) { - found = 1; - if (level_is_recovered(rs, g->level - 1)) - break; - else { - log_group(g, "lower level %d is not " - "recovered in rs %d", - g->level - 1, rs->nodeid); - /* dump_recovery_sets(); */ - return 0; - } - } + if (re->group == g) + return 1; } } + return 0; +} + +static int rs_lower_levels_recovered(struct recovery_set *rs, int level) +{ + struct recovery_entry *re; - if (!found) + list_for_each_entry(re, &rs->entries, list) { + if (re->group->level < level && !re->recovered) + return 0; + } + return 1; +} + +/* lower level groups should be recovered in each rs this group is in */ + +static int lower_levels_recovered(group_t *g) +{ + struct recovery_set *rs; + + list_for_each_entry(rs, &recovery_sets, list) { + if (!group_in_recovery_set(rs, g)) + continue; + + if (rs_lower_levels_recovered(rs, g->level)) + continue; + + log_group(g, "lower levels not recovered in rs %d", rs->nodeid); return 0; + } + return 1; } @@ -453,24 +454,14 @@ static int lowest_level(group_t *g) { struct recovery_set *rs; - struct recovery_entry *re; - int found = 0; list_for_each_entry(rs, &recovery_sets, list) { - list_for_each_entry(re, &rs->entries, list) { - if (re->group == g) { - found = 1; - if (level_is_lowest(rs, g->level)) - break; - else - return 0; - } - - } - } - - if (!found) + if (!group_in_recovery_set(rs, g)) + continue; + if (level_is_lowest(rs, g->level)) + continue; return 0; + } return 1; } @@ -1006,11 +997,11 @@ } else log_group(g, "wait for all_levels_all_stopped"); } else { - if (lower_level_recovered(g)) { + if (lower_levels_recovered(g)) { ev->state = EST_FAIL_START_WAIT; do_start = 1; } else - log_group(g, "wait for lower_level_recovered"); + log_group(g, "wait for lower_levels_recovered"); } if (!do_start)