From: jbrassow@sourceware.org <jbrassow@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-client.c ...
Date: 5 Apr 2007 21:33:37 -0000 [thread overview]
Message-ID: <20070405213337.30913.qmail@sourceware.org> (raw)
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL45
Changes by: jbrassow at sourceware.org 2007-04-05 22:33:36
Modified files:
cmirror-kernel/src: dm-cmirror-client.c dm-cmirror-server.c
Log message:
Bug 234918 Processed: NMI Watchdog detected LOCKUP while running proces...
Bug 217438: scrolling kernel requests to mark mirror regions
Item 1:
I needed to check for marked regions when getting resync work, not
just check for resyncing regions when a mark/flush happens.
Item 2:
There is a corner case that allows two calls to clear the same
region. The second does not need to be logged.
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-client.c.diff?cvsroot=cluster&only_with_tag=RHEL45&r1=1.1.2.41.2.2&r2=1.1.2.41.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-server.c.diff?cvsroot=cluster&only_with_tag=RHEL45&r1=1.1.2.26.2.3&r2=1.1.2.26.2.4
--- cluster/cmirror-kernel/src/Attic/dm-cmirror-client.c 2007/04/03 18:23:01 1.1.2.41.2.2
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-client.c 2007/04/05 21:33:36 1.1.2.41.2.3
@@ -1034,7 +1034,9 @@
spin_lock(&lc->state_lock);
- /* Should find match in this list, or no lists at all */
+ /*
+ * The nominal case is to find the region in the marked list
+ */
list_for_each_entry_safe(rs, tmp_rs, &lc->mark_logged, rs_list){
if(region == rs->rs_region){
list_del_init(&rs->rs_list);
@@ -1043,28 +1045,46 @@
}
}
-
- list_for_each_entry_safe(rs, tmp_rs, &lc->mark_waiting, rs_list){
+ /*
+ * It is possible, but unlikely to get to this case. It requires
+ * the following to happen:
+ * 1) mark the region for writing
+ * 2) clear the region
+ * 3) clear doesn't get flushed because of bug 235040
+ * 4) suspend due to server relocation
+ * 5) on-disk log says we need to recover (because it hasn't been cleared)
+ * 6) we recover the region
+ * 7) clearing the region after recovery causes us to get here
+ *
+ * Once 235040 is cleared, any entries found in this list should
+ * cause a bug.
+ */
+ list_for_each_entry_safe(rs, tmp_rs, &lc->clear_waiting, rs_list){
if(region == rs->rs_region){
- DMERR("Clear pre-empting mark (%Lu/%s)",
- region, lc->uuid + (strlen(lc->uuid) - 8));
- BUG();
+ DMERR("%d) Double clear on region ("
+ SECTOR_FORMAT ")", __LINE__, region);
+ goto out;
}
}
- list_for_each_entry_safe(rs, tmp_rs, &lc->clear_waiting, rs_list){
+ list_for_each_entry_safe(rs, tmp_rs, &lc->mark_waiting, rs_list){
if(region == rs->rs_region){
- DMERR("%d) Double clear on region ("
- SECTOR_FORMAT ")", __LINE__, region);
+ DMERR("Clear pre-empting mark (%Lu/%s)",
+ region, lc->uuid + (strlen(lc->uuid) - 8));
BUG();
}
}
+
/* We can get here because we may be doing resync_work, and therefore,**
** clearing without ever marking..................................... */
/* Don't need to spin_unlock, because allocation is non-blocking */
rs_new = mempool_alloc(region_state_pool, GFP_ATOMIC);
- BUG_ON(!rs_new);
+ if (!rs_new) {
+ DMERR("Failed to allocate space for clear region request: %Lu",
+ region);
+ BUG();
+ }
memset(rs_new, 0, sizeof(struct region_state));
rs_new->rs_region = region;
@@ -1088,6 +1108,21 @@
DMWARN("Error while getting resync work: bad region");
rtn = 0;
}
+
+ /*
+ * Check for bug 235039
+ * Note the changes in cluser_clear_region
+ */
+ if (rtn == 1) {
+ struct region_state *rs, *tmp_rs;
+ list_for_each_entry_safe(rs, tmp_rs, &lc->clear_waiting, rs_list) {
+ if (*region == rs->rs_region) {
+ DMERR("WARNING: Bug 235039/235040 detected!");
+ DMERR("Work-around in place.");
+ }
+ }
+ }
+
return rtn;
}
--- cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c 2007/04/04 21:36:01 1.1.2.26.2.3
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c 2007/04/05 21:33:36 1.1.2.26.2.4
@@ -656,6 +656,8 @@
static int server_complete_resync_work(struct log_c *lc, struct log_request *lr, int success){
+ struct region_user *ru;
+
if (lr->u.lr_region > lc->region_count) {
return -EINVAL;
}
@@ -678,6 +680,42 @@
DMDEBUG("Resync work completed: %Lu", lr->u.lr_region);
} else if (log_test_bit(lc->sync_bits, lr->u.lr_region)) {
+ ru = find_ru_by_region(lc, lr->u.lr_region);
+
+ /*
+ * The following condition can never happen unless we have
+ * a corrupted list or we had a communication error.
+ *
+ * If a write failed to one of the mirror devices, the ru
+ * should be RU_WRITE. If a recovery failed, it should be
+ * RU_RECOVER
+ */
+ if (!ru) {
+ DMERR("Unable to find region being marked out-of-sync: %Lu",
+ lr->u.lr_region);
+ return -EINVAL;
+ }
+
+ if (ru->ru_rw == RU_RECOVER) {
+ if (lr->u.lr_region != lc->recovering_region) {
+ DMERR("Recovering region mismatch: (%Lu/%Lu)",
+ lr->u.lr_region, lc->recovering_region);
+ BUG();
+ }
+ /*
+ * Clear the recovery
+ */
+ lc->recovering_region = (uint64_t)-1;
+ list_del(&ru->ru_list);
+ mempool_free(ru, region_user_pool);
+ } else { /* ru->ru_rw == RU_WRITE */
+ /*
+ * Mirror has place the region into RH_NOSYNC
+ * It is safe to pull the ru
+ */
+ list_del(&ru->ru_list);
+ mempool_free(ru, region_user_pool);
+ }
/* gone again: lc->sync_count--;*/
log_clear_bit(lc, lc->sync_bits, lr->u.lr_region);
}
next reply other threads:[~2007-04-05 21:33 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-04-05 21:33 jbrassow [this message]
-- strict thread matches above, loose matches on Subject: below --
2007-10-03 19:02 [Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-client.c jbrassow
2007-09-27 20:31 jbrassow
2007-09-26 3:15 jbrassow
2007-09-21 20:07 jbrassow
2007-09-13 15:24 jbrassow
2007-07-11 16:18 jbrassow
2007-04-26 16:55 jbrassow
2007-04-26 16:54 jbrassow
2007-04-24 20:10 jbrassow
2007-04-24 20:08 jbrassow
2007-04-10 7:13 jbrassow
2007-04-10 7:12 jbrassow
2007-04-05 21:32 jbrassow
2007-04-03 18:23 jbrassow
2007-04-03 18:21 jbrassow
2007-03-22 22:34 jbrassow
2007-03-22 22:22 jbrassow
2007-03-14 4:28 jbrassow
2007-02-26 17:38 jbrassow
2007-02-20 19:35 jbrassow
2007-02-19 16:29 jbrassow
2007-02-14 17:44 jbrassow
2007-02-02 17:22 jbrassow
2007-01-08 19:28 jbrassow
2006-12-07 18:58 jbrassow
2006-09-05 17:50 jbrassow
2006-09-05 17:48 jbrassow
2006-07-27 23:11 jbrassow
2006-07-27 23:11 jbrassow
2006-07-22 22:19 jbrassow
2006-07-22 22:19 jbrassow
2006-07-22 22:12 jbrassow
2006-06-29 19:49 jbrassow
2006-06-29 19:48 jbrassow
2006-06-29 19:46 jbrassow
2006-06-27 20:19 jbrassow
2006-06-15 19:48 jbrassow
2006-06-15 19:34 jbrassow
2006-06-13 16:26 jbrassow
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070405213337.30913.qmail@sourceware.org \
--to=jbrassow@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.