From mboxrd@z Thu Jan 1 00:00:00 1970 From: jbrassow@sourceware.org Date: 4 Apr 2007 21:36:02 -0000 Subject: [Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-server.c Message-ID: <20070404213602.29783.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Branch: RHEL45 Changes by: jbrassow at sourceware.org 2007-04-04 22:36:02 Modified files: cmirror-kernel/src: dm-cmirror-server.c Log message: Bug 235252: cmirror synchronization deadlocked waiting for response fro... Moved the check for recovery/write conflict to flush from mark_region to avoid potential conflicts that were causing writes to indefinitly hang on failure conditions. Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-server.c.diff?cvsroot=cluster&only_with_tag=RHEL45&r1=1.1.2.26.2.2&r2=1.1.2.26.2.3 --- cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c 2007/04/03 18:23:01 1.1.2.26.2.2 +++ cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c 2007/04/04 21:36:01 1.1.2.26.2.3 @@ -223,10 +223,13 @@ return count; } +struct region_user *find_ru_by_region(struct log_c *lc, region_t region); static int _core_get_resync_work(struct log_c *lc, region_t *region) { + int sync_search, conflict = 0; + if (lc->recovering_region != (uint64_t)-1) { - DMDEBUG("Someone is already recovering (%Lu)", lc->recovering_region); + DMDEBUG("Someone is already recovering region %Lu", lc->recovering_region); return 0; } @@ -242,16 +245,27 @@ return 0; } } - *region = ext2_find_next_zero_bit((unsigned long *) lc->sync_bits, - lc->region_count, - lc->sync_search); - lc->sync_search = *region + 1; + for (sync_search = lc->sync_search; + sync_search < lc->region_count; + sync_search = (*region + 1)) { + *region = ext2_find_next_zero_bit((unsigned long *) lc->sync_bits, + lc->region_count, + sync_search); + if (find_ru_by_region(lc, *region)) { + conflict = 1; + DMDEBUG("Recovery blocked by outstanding write on region %Lu", + *region); + } else { + break; + } + } + if (!conflict) + lc->sync_search = *region + 1; if (*region >= lc->region_count) return 0; lc->recovering_region = *region; - DMDEBUG("Assigning recovery work: %Lu", *region); return 1; } @@ -374,6 +388,8 @@ bad_count++; log_clear_bit(lc, lc->sync_bits, ru->ru_region); if (ru->ru_rw == RU_RECOVER) { + DMINFO("Failed node was recovering region %Lu - cleared", + ru->ru_region); lc->recovering_region = (uint64_t)-1; } list_del(&ru->ru_list); @@ -523,14 +539,19 @@ log_clear_bit(lc, lc->clean_bits, lr->u.lr_region); list_add(&new->ru_list, &lc->region_users); } else if (ru->ru_rw == RU_RECOVER) { + /* + * The flush will block if a write conflicts with a + * recovering region. In the meantime, we add this + * entry to the tail of the list so the recovery + * gets cleared first. + */ DMDEBUG("Attempt to mark a region " SECTOR_FORMAT "/%s which is being recovered.", lr->u.lr_region, lc->uuid + (strlen(lc->uuid) - 8)); DMDEBUG("Current recoverer: %u", ru->ru_nodeid); DMDEBUG("Mark requester : %u", who); - - mempool_free(new, region_user_pool); - return -EBUSY; + log_clear_bit(lc, lc->clean_bits, lr->u.lr_region); + list_add_tail(&new->ru_list, &lc->region_users); } else if (!find_ru(lc, who, lr->u.lr_region)) { list_add(&new->ru_list, &ru->ru_list); } else { @@ -569,6 +590,34 @@ static int server_flush(struct log_c *lc) { int r = 0; + int count = 0; + struct region_user *ru, *ru2; + + if (lc->recovering_region != (uint64_t)-1) { + list_for_each_entry(ru, &lc->region_users, ru_list) + if (ru->ru_region == lc->recovering_region) + count++; + + if (count > 1) { + list_for_each_entry(ru, &lc->region_users, ru_list) + if (ru->ru_rw == RU_RECOVER) + break; + + DMDEBUG("Flush includes region which is being recovered (%u/%Lu). Delaying...", + ru->ru_nodeid, ru->ru_region); + DMDEBUG("Recovering region: %Lu", lc->recovering_region); + DMDEBUG(" sync_bit: %s, clean_bit: %s", + log_test_bit(lc->sync_bits, lc->recovering_region) ? "set" : "unset", + log_test_bit(lc->clean_bits, lc->recovering_region) ? "set" : "unset"); + + list_for_each_entry(ru2, &lc->region_users, ru_list) + if (ru->ru_region == ru2->ru_region) + DMDEBUG(" %s", (ru2->ru_rw == RU_RECOVER) ? "recover" : + (ru2->ru_rw == RU_WRITE) ? "writer" : "unknown"); + + return -EBUSY; + } + } r = write_bits(lc); if (!r) { @@ -597,6 +646,7 @@ new->ru_region = lr->u.lr_region_rtn; new->ru_rw = RU_RECOVER; list_add(&new->ru_list, &lc->region_users); + DMDEBUG("Assigning recovery work to %u: %Lu", who, new->ru_region); } else { mempool_free(new, region_user_pool); } @@ -624,6 +674,9 @@ log_set_bit(lc, lc->sync_bits, lr->u.lr_region); lc->sync_count++; } + lc->sync_pass = 0; + + DMDEBUG("Resync work completed: %Lu", lr->u.lr_region); } else if (log_test_bit(lc->sync_bits, lr->u.lr_region)) { /* gone again: lc->sync_count--;*/ log_clear_bit(lc, lc->sync_bits, lr->u.lr_region);