From: jbrassow@sourceware.org <jbrassow@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-client.c ...
Date: 26 Apr 2007 16:54:51 -0000 [thread overview]
Message-ID: <20070426165451.24620.qmail@sourceware.org> (raw)
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL4
Changes by: jbrassow at sourceware.org 2007-04-26 17:54:49
Modified files:
cmirror-kernel/src: dm-cmirror-client.c dm-cmirror-server.c
Log message:
Bug 238031: cluster mirrors not handling all recovery/write conflicts
Problem is that the kernel (main mirror code) does not do any marks/clears when
writing to a region before its recovery. So, it is not possible for the server
to detect a conflict. Basically, we must turn back on the
'is_remote_recovering' function and disallow any writes to regions that are OR
WILL BE recovering.
It's really going to cause some pain during writes while mirrors are re-syncing.
The better fix for the future is to have the writes always mark/clear the
regions - then we can again remove the 'is_remote_recovering' function.
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-client.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.46&r2=1.1.2.47
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-server.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.34&r2=1.1.2.35
--- cluster/cmirror-kernel/src/Attic/dm-cmirror-client.c 2007/04/24 20:08:57 1.1.2.46
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-client.c 2007/04/26 16:54:49 1.1.2.47
@@ -861,11 +861,10 @@
int rtn;
struct log_c *lc = (struct log_c *) log->context;
-/* take out optimization
if(atomic_read(&lc->in_sync) == 1){
return 0;
}
-*/
+
rtn = consult_server(lc, region, LRT_IS_REMOTE_RECOVERING, NULL);
return rtn;
}
@@ -876,11 +875,11 @@
struct log_c *lc = (struct log_c *) log->context;
/* check known_regions, return if found */
-/* take out optimization
+
if(atomic_read(&lc->in_sync) == 1){
return 1;
}
-*/
+
if(!block){
return -EWOULDBLOCK;
}
@@ -1414,7 +1413,7 @@
.resume = cluster_resume,
.get_region_size = cluster_get_region_size,
.is_clean = cluster_is_clean,
-/* .is_remote_recovering = cluster_is_remote_recovering,*/
+ .is_remote_recovering = cluster_is_remote_recovering,
.in_sync = cluster_in_sync,
.flush = cluster_flush,
.mark_region = cluster_mark_region,
@@ -1436,7 +1435,7 @@
.resume = cluster_resume,
.get_region_size = cluster_get_region_size,
.is_clean = cluster_is_clean,
-/* .is_remote_recovering = cluster_is_remote_recovering,*/
+ .is_remote_recovering = cluster_is_remote_recovering,
.in_sync = cluster_in_sync,
.flush = cluster_flush,
.mark_region = cluster_mark_region,
--- cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c 2007/04/24 20:08:57 1.1.2.34
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c 2007/04/26 16:54:49 1.1.2.35
@@ -494,12 +494,32 @@
static int server_is_remote_recovering(struct log_c *lc, struct log_request *lr)
{
+ region_t region;
struct region_user *ru;
- if ((ru = find_ru_by_region(lc, lr->u.lr_region)) &&
- (ru->ru_rw == RU_RECOVER))
+ /*
+ * This gets a bit complicated. I wish we didn't have to use this
+ * function, but because the mirror code doesn't mark regions which
+ * it writes to that are out-of-sync, we need this function.
+ *
+ * Problem is, we don't know how long the user is going to take to
+ * write to the region after they have called this function. So,
+ * we are forced at this point to deny any writes to regions we
+ * are recovering or might recover in the future.
+ *
+ * We can count on the client side to not send us one of these
+ * requests if the mirror is known to be in-sync.
+ *
+ * And yes, it sucks to take this much time off the I/O.
+ */
+ region = ext2_find_next_zero_bit((unsigned long *) lc->sync_bits,
+ lc->region_count, 0);
+
+ if (lr->u.lr_region >= region) {
+ DMDEBUG("Remote recovery conflict: (%Lu >= %Lu)/%s",
+ lr->u.lr_region, region, lc->uuid + (strlen(lc->uuid) - 8));
lr->u.lr_int_rtn = 1;
- else
+ } else
lr->u.lr_int_rtn = 0;
return 0;
@@ -639,24 +659,65 @@
static int server_get_resync_work(struct log_c *lc, struct log_request *lr, uint32_t who)
{
struct region_user *new;
+ int sync_search, conflict = 0;
+ region_t *region = &(lr->u.lr_region_rtn);
- new = mempool_alloc(region_user_pool, GFP_NOFS);
- if(!new){
- lr->u.lr_int_rtn = 0;
- return -ENOMEM;
+ lr->u.lr_int_rtn = 0; /* Default to no work */
+
+ if (lc->recovering_region != (uint64_t)-1) {
+ DMDEBUG("Someone is already recovering region %Lu/%s",
+ lc->recovering_region, lc->uuid + (strlen(lc->uuid) - 8));
+ return 0;
}
-
- if ((lr->u.lr_int_rtn = _core_get_resync_work(lc, &(lr->u.lr_region_rtn)))){
- new->ru_nodeid = who;
- new->ru_region = lr->u.lr_region_rtn;
- new->ru_rw = RU_RECOVER;
- list_add(&new->ru_list, &lc->region_users);
- DMDEBUG("Assigning recovery work to %u/%s: %Lu",
- who, lc->uuid + (strlen(lc->uuid) - 8), new->ru_region);
- } else {
- mempool_free(new, region_user_pool);
+
+ if (lc->sync_search >= lc->region_count) {
+ /*
+ * FIXME: pvmove is not supported yet, but when it is,
+ * an audit of sync_count changes will need to be made
+ */
+ if ((lc->sync_count < lc->region_count) && !lc->sync_pass) {
+ lc->sync_search = 0;
+ lc->sync_pass++;
+ } else {
+ return 0;
+ }
+ }
+
+ for (sync_search = lc->sync_search;
+ sync_search < lc->region_count;
+ sync_search = (*region + 1)) {
+ *region = ext2_find_next_zero_bit((unsigned long *) lc->sync_bits,
+ lc->region_count,
+ sync_search);
+ if (find_ru_by_region(lc, *region)) {
+ conflict = 1;
+ DMDEBUG("Recovery blocked by outstanding write on region %Lu/%s",
+ *region, lc->uuid + (strlen(lc->uuid) - 8));
+ } else {
+ break;
+ }
}
+ if (*region >= lc->region_count)
+ return 0;
+
+ new = mempool_alloc(region_user_pool, GFP_NOFS);
+ if (!new)
+ return -ENOMEM;
+
+ if (!conflict)
+ lc->sync_search = *region + 1;
+
+ lc->recovering_region = *region;
+
+ lr->u.lr_int_rtn = 1; /* Assigning work */
+ new->ru_nodeid = who;
+ new->ru_region = *region;
+ new->ru_rw = RU_RECOVER;
+ list_add(&new->ru_list, &lc->region_users);
+ DMDEBUG("Assigning recovery work to %u/%s: %Lu",
+ who, lc->uuid + (strlen(lc->uuid) - 8), new->ru_region);
+
return 0;
}
next reply other threads:[~2007-04-26 16:54 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-04-26 16:54 jbrassow [this message]
-- strict thread matches above, loose matches on Subject: below --
2007-10-03 19:02 [Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-client.c jbrassow
2007-09-27 20:31 jbrassow
2007-09-26 3:15 jbrassow
2007-09-21 20:07 jbrassow
2007-09-13 15:24 jbrassow
2007-07-11 16:18 jbrassow
2007-04-26 16:55 jbrassow
2007-04-24 20:10 jbrassow
2007-04-24 20:08 jbrassow
2007-04-10 7:13 jbrassow
2007-04-10 7:12 jbrassow
2007-04-05 21:33 jbrassow
2007-04-05 21:32 jbrassow
2007-04-03 18:23 jbrassow
2007-04-03 18:21 jbrassow
2007-03-22 22:34 jbrassow
2007-03-22 22:22 jbrassow
2007-03-14 4:28 jbrassow
2007-02-26 17:38 jbrassow
2007-02-20 19:35 jbrassow
2007-02-19 16:29 jbrassow
2007-02-14 17:44 jbrassow
2007-02-02 17:22 jbrassow
2007-01-08 19:28 jbrassow
2006-12-07 18:58 jbrassow
2006-09-05 17:50 jbrassow
2006-09-05 17:48 jbrassow
2006-07-27 23:11 jbrassow
2006-07-27 23:11 jbrassow
2006-07-22 22:19 jbrassow
2006-07-22 22:19 jbrassow
2006-07-22 22:12 jbrassow
2006-06-29 19:49 jbrassow
2006-06-29 19:48 jbrassow
2006-06-29 19:46 jbrassow
2006-06-27 20:19 jbrassow
2006-06-15 19:48 jbrassow
2006-06-15 19:34 jbrassow
2006-06-13 16:26 jbrassow
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070426165451.24620.qmail@sourceware.org \
--to=jbrassow@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).