From: jbrassow@sourceware.org <jbrassow@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-client.c ...
Date: 13 Sep 2007 15:24:21 -0000 [thread overview]
Message-ID: <20070913152421.1883.qmail@sourceware.org> (raw)
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL4
Changes by: jbrassow at sourceware.org 2007-09-13 15:24:20
Modified files:
cmirror-kernel/src: dm-cmirror-client.c dm-cmirror-server.c
Log message:
Bug 257881: Flush/recovery collision leads to deadlock after leg ...
The procedure for coordinating nominal I/O and recovery I/O, was to
either:
1) delay a flush which contained a mark to a region being recovered
2) skip over regions that are currently marked when assigning recovery
This bug has to do with the way #1 was implemented.
The following scenario would trigger it:
1) node1 is assigned recovery on region X
2) node1 also does a mark (write) on region Y
3) node2 attempts to mark region X
**) any flush issued here will delay waiting for recovery to complete on X
4) node1 needs to perform the flush before it can get on with completing
recovery - but it can't flush, so everyone is delayed *forever*.
The fix was to allow flushes from nodes that are not attempting to mark
regions that are being recovered. In the example above, node1 should be
allowed to complete the flush because it is not trying to write to the
same region that is being recovered. node2 would be correctly delayed.
Since node1 can complete the flush, it can also complete the recovery -
thus allowing things to proceed.
This bug only affects mirrors that are not in-sync and are doing I/O.
This bug can occur whether there are device/machine failures or not.
This bug is most easily reproduced with a number of mirrors, but would
be possible with just one.
I've also fixed up some debugging output so it is more consistent and
easier to follow the flow of events.
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-client.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.50&r2=1.1.2.51
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-server.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.36&r2=1.1.2.37
--- cluster/cmirror-kernel/src/Attic/dm-cmirror-client.c 2007/08/23 16:51:39 1.1.2.50
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-client.c 2007/09/13 15:24:20 1.1.2.51
@@ -946,7 +946,8 @@
while ((r = consult_server(lc, 0, LRT_FLUSH, NULL))) {
if (r == -EBUSY) {
- DMDEBUG("Delaying flush due to recovery");
+ DMDEBUG("Delaying flush due to recovery (%s)",
+ lc->uuid + (strlen(lc->uuid) - 8));
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(HZ);
continue;
@@ -1110,8 +1111,8 @@
}
if (rtn)
- DMDEBUG("Client received resync work: %Lu/%s",
- *region, lc->uuid + (strlen(lc->uuid) - 8));
+ DMDEBUG("Received recovery work from %u: %Lu/%s",
+ lc->server_id, *region, lc->uuid + (strlen(lc->uuid) - 8));
/*
* Check for bug 235039
@@ -1137,12 +1138,19 @@
region_t success_tmp = success;
struct log_c *lc = (struct log_c *) log->context;
+ if (success)
+ DMDEBUG("Client finishing recovery: %Lu/%s",
+ region, lc->uuid + (strlen(lc->uuid) - 8));
+ else
+ DMDEBUG("Notifying server(%u) of sync change: %Lu/%s",
+ lc->server_id, region,
+ lc->uuid + (strlen(lc->uuid) - 8));
for (i = 0; i < 5; i++) {
if (!consult_server(lc, region,
LRT_COMPLETE_RESYNC_WORK, &success_tmp))
break;
success_tmp = success;
- DMWARN("Unable to notify server of completed resync work");
+ DMWARN("Unable to notify server of sync state change");
}
return;
}
@@ -1203,6 +1211,7 @@
DMDEBUG("LOG INFO:");
DMDEBUG(" uuid: %s", lc->uuid);
DMDEBUG(" uuid_ref : %d", lc->uuid_ref);
+ DMDEBUG(" log type : %s", (lc->log_dev) ? "disk" : "core");
DMDEBUG(" ?region_count: %Lu", lc->region_count);
DMDEBUG(" ?sync_count : %Lu", lc->sync_count);
DMDEBUG(" ?sync_search : %d", lc->sync_search);
--- cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c 2007/07/11 16:18:03 1.1.2.36
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c 2007/09/13 15:24:20 1.1.2.37
@@ -549,10 +549,11 @@
}
-static int server_flush(struct log_c *lc)
+static int server_flush(struct log_c *lc, uint32_t who)
{
int r = 0;
int count = 0;
+ int do_flush = 1;
struct region_user *ru, *marker = NULL, *recoverer = NULL;
if (lc->recovering_region != (uint64_t)-1) {
@@ -560,22 +561,28 @@
if (ru->ru_region == lc->recovering_region) {
if (ru->ru_rw == RU_RECOVER)
recoverer = ru;
- else
+ else if (ru->ru_nodeid == who) {
+ do_flush = 0;
marker = ru;
+ } else
+ marker = ru;
+
count++;
}
if (marker && recoverer) {
- DMDEBUG("Flush/recovery collision (%Lu/%s): count = %d, marker = %u, recoverer = %u",
+ DMDEBUG("Flush/recovery collision on %Lu/%s: count = %d, marker = %u, recoverer = %u",
marker->ru_region, lc->uuid + (strlen(lc->uuid) - 8),
count, marker->ru_nodeid, recoverer->ru_nodeid);
- /*
- DMDEBUG(" sync_bit: %s, clean_bit: %s",
- log_test_bit(lc->sync_bits, lc->recovering_region) ? "set" : "unset",
- log_test_bit(lc->clean_bits, lc->recovering_region) ? "set" : "unset");
- */
-
- return -EBUSY;
+ DMDEBUG(" Count = %d", count);
+ DMDEBUG(" Marker = %u", marker->ru_nodeid);
+ DMDEBUG(" Recoverer = %u", recoverer->ru_nodeid);
+ DMDEBUG(" Flusher = %u", who);
+ if (!do_flush) {
+ DMDEBUG("Blocking flush");
+ return -EBUSY;
+ } else
+ DMDEBUG("Allowing flush");
}
}
@@ -650,8 +657,8 @@
new->ru_region = *region;
new->ru_rw = RU_RECOVER;
list_add(&new->ru_list, &lc->region_users);
- DMDEBUG("Assigning recovery work to %u/%s: %Lu",
- who, lc->uuid + (strlen(lc->uuid) - 8), new->ru_region);
+ DMDEBUG("Assigning recovery work to %u: %Lu/%s",
+ who, new->ru_region, lc->uuid + (strlen(lc->uuid) - 8));
return 0;
}
@@ -680,7 +687,8 @@
}
lc->sync_pass = 0;
- DMDEBUG("Resync work completed: %Lu", lr->u.lr_region);
+ DMDEBUG("Resync work completed by %u: %Lu/%s",
+ who, lr->u.lr_region, lc->uuid + (strlen(lc->uuid) - 8));
return 0;
}
@@ -1077,7 +1085,12 @@
error = server_clear_region(lc, lr, nodeid);
break;
case LRT_FLUSH:
- error = server_flush(lc);
+ if(!(nodeid =
+ ipaddr_to_nodeid((struct sockaddr *)msg.msg_name))){
+ error = -ENXIO;
+ break;
+ }
+ error = server_flush(lc, nodeid);
break;
case LRT_GET_RESYNC_WORK:
if(!(nodeid =
next reply other threads:[~2007-09-13 15:24 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-09-13 15:24 jbrassow [this message]
-- strict thread matches above, loose matches on Subject: below --
2007-10-03 19:02 [Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-client.c jbrassow
2007-09-27 20:31 jbrassow
2007-09-26 3:15 jbrassow
2007-09-21 20:07 jbrassow
2007-07-11 16:18 jbrassow
2007-04-26 16:55 jbrassow
2007-04-26 16:54 jbrassow
2007-04-24 20:10 jbrassow
2007-04-24 20:08 jbrassow
2007-04-10 7:13 jbrassow
2007-04-10 7:12 jbrassow
2007-04-05 21:33 jbrassow
2007-04-05 21:32 jbrassow
2007-04-03 18:23 jbrassow
2007-04-03 18:21 jbrassow
2007-03-22 22:34 jbrassow
2007-03-22 22:22 jbrassow
2007-03-14 4:28 jbrassow
2007-02-26 17:38 jbrassow
2007-02-20 19:35 jbrassow
2007-02-19 16:29 jbrassow
2007-02-14 17:44 jbrassow
2007-02-02 17:22 jbrassow
2007-01-08 19:28 jbrassow
2006-12-07 18:58 jbrassow
2006-09-05 17:50 jbrassow
2006-09-05 17:48 jbrassow
2006-07-27 23:11 jbrassow
2006-07-27 23:11 jbrassow
2006-07-22 22:19 jbrassow
2006-07-22 22:19 jbrassow
2006-07-22 22:12 jbrassow
2006-06-29 19:49 jbrassow
2006-06-29 19:48 jbrassow
2006-06-29 19:46 jbrassow
2006-06-27 20:19 jbrassow
2006-06-15 19:48 jbrassow
2006-06-15 19:34 jbrassow
2006-06-13 16:26 jbrassow
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070913152421.1883.qmail@sourceware.org \
--to=jbrassow@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).