From: jbrassow@sourceware.org <jbrassow@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-client.c ...
Date: 26 Apr 2007 16:54:51 -0000 [thread overview]
Message-ID: <20070426165451.24620.qmail@sourceware.org> (raw)
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL4
Changes by: jbrassow at sourceware.org 2007-04-26 17:54:49
Modified files:
cmirror-kernel/src: dm-cmirror-client.c dm-cmirror-server.c
Log message:
Bug 238031: cluster mirrors not handling all recovery/write conflicts
Problem is that the kernel (main mirror code) does not do any marks/clears when
writing to a region before its recovery. So, it is not possible for the server
to detect a conflict. Basically, we must turn back on the
'is_remote_recovering' function and disallow any writes to regions that are OR
WILL BE recovering.
It's really going to cause some pain during writes while mirrors are re-syncing.
The better fix for the future is to have the writes always mark/clear the
regions - then we can again remove the 'is_remote_recovering' function.
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-client.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.46&r2=1.1.2.47
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-server.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.34&r2=1.1.2.35
--- cluster/cmirror-kernel/src/Attic/dm-cmirror-client.c 2007/04/24 20:08:57 1.1.2.46
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-client.c 2007/04/26 16:54:49 1.1.2.47
@@ -861,11 +861,10 @@
int rtn;
struct log_c *lc = (struct log_c *) log->context;
-/* take out optimization
if(atomic_read(&lc->in_sync) == 1){
return 0;
}
-*/
+
rtn = consult_server(lc, region, LRT_IS_REMOTE_RECOVERING, NULL);
return rtn;
}
@@ -876,11 +875,11 @@
struct log_c *lc = (struct log_c *) log->context;
/* check known_regions, return if found */
-/* take out optimization
+
if(atomic_read(&lc->in_sync) == 1){
return 1;
}
-*/
+
if(!block){
return -EWOULDBLOCK;
}
@@ -1414,7 +1413,7 @@
.resume = cluster_resume,
.get_region_size = cluster_get_region_size,
.is_clean = cluster_is_clean,
-/* .is_remote_recovering = cluster_is_remote_recovering,*/
+ .is_remote_recovering = cluster_is_remote_recovering,
.in_sync = cluster_in_sync,
.flush = cluster_flush,
.mark_region = cluster_mark_region,
@@ -1436,7 +1435,7 @@
.resume = cluster_resume,
.get_region_size = cluster_get_region_size,
.is_clean = cluster_is_clean,
-/* .is_remote_recovering = cluster_is_remote_recovering,*/
+ .is_remote_recovering = cluster_is_remote_recovering,
.in_sync = cluster_in_sync,
.flush = cluster_flush,
.mark_region = cluster_mark_region,
--- cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c 2007/04/24 20:08:57 1.1.2.34
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c 2007/04/26 16:54:49 1.1.2.35
@@ -494,12 +494,32 @@
static int server_is_remote_recovering(struct log_c *lc, struct log_request *lr)
{
+ region_t region;
struct region_user *ru;
- if ((ru = find_ru_by_region(lc, lr->u.lr_region)) &&
- (ru->ru_rw == RU_RECOVER))
+ /*
+ * This gets a bit complicated. I wish we didn't have to use this
+ * function, but because the mirror code doesn't mark regions which
+ * it writes to that are out-of-sync, we need this function.
+ *
+ * Problem is, we don't know how long the user is going to take to
+ * write to the region after they have called this function. So,
+ * we are forced at this point to deny any writes to regions we
+ * are recovering or might recover in the future.
+ *
+ * We can count on the client side to not send us one of these
+ * requests if the mirror is known to be in-sync.
+ *
+ * And yes, it sucks to take this much time off the I/O.
+ */
+ region = ext2_find_next_zero_bit((unsigned long *) lc->sync_bits,
+ lc->region_count, 0);
+
+ if (lr->u.lr_region >= region) {
+ DMDEBUG("Remote recovery conflict: (%Lu >= %Lu)/%s",
+ lr->u.lr_region, region, lc->uuid + (strlen(lc->uuid) - 8));
lr->u.lr_int_rtn = 1;
- else
+ } else
lr->u.lr_int_rtn = 0;
return 0;
@@ -639,24 +659,65 @@
static int server_get_resync_work(struct log_c *lc, struct log_request *lr, uint32_t who)
{
struct region_user *new;
+ int sync_search, conflict = 0;
+ region_t *region = &(lr->u.lr_region_rtn);
- new = mempool_alloc(region_user_pool, GFP_NOFS);
- if(!new){
- lr->u.lr_int_rtn = 0;
- return -ENOMEM;
+ lr->u.lr_int_rtn = 0; /* Default to no work */
+
+ if (lc->recovering_region != (uint64_t)-1) {
+ DMDEBUG("Someone is already recovering region %Lu/%s",
+ lc->recovering_region, lc->uuid + (strlen(lc->uuid) - 8));
+ return 0;
}
-
- if ((lr->u.lr_int_rtn = _core_get_resync_work(lc, &(lr->u.lr_region_rtn)))){
- new->ru_nodeid = who;
- new->ru_region = lr->u.lr_region_rtn;
- new->ru_rw = RU_RECOVER;
- list_add(&new->ru_list, &lc->region_users);
- DMDEBUG("Assigning recovery work to %u/%s: %Lu",
- who, lc->uuid + (strlen(lc->uuid) - 8), new->ru_region);
- } else {
- mempool_free(new, region_user_pool);
+
+ if (lc->sync_search >= lc->region_count) {
+ /*
+ * FIXME: pvmove is not supported yet, but when it is,
+ * an audit of sync_count changes will need to be made
+ */
+ if ((lc->sync_count < lc->region_count) && !lc->sync_pass) {
+ lc->sync_search = 0;
+ lc->sync_pass++;
+ } else {
+ return 0;
+ }
+ }
+
+ for (sync_search = lc->sync_search;
+ sync_search < lc->region_count;
+ sync_search = (*region + 1)) {
+ *region = ext2_find_next_zero_bit((unsigned long *) lc->sync_bits,
+ lc->region_count,
+ sync_search);
+ if (find_ru_by_region(lc, *region)) {
+ conflict = 1;
+ DMDEBUG("Recovery blocked by outstanding write on region %Lu/%s",
+ *region, lc->uuid + (strlen(lc->uuid) - 8));
+ } else {
+ break;
+ }
}
+ if (*region >= lc->region_count)
+ return 0;
+
+ new = mempool_alloc(region_user_pool, GFP_NOFS);
+ if (!new)
+ return -ENOMEM;
+
+ if (!conflict)
+ lc->sync_search = *region + 1;
+
+ lc->recovering_region = *region;
+
+ lr->u.lr_int_rtn = 1; /* Assigning work */
+ new->ru_nodeid = who;
+ new->ru_region = *region;
+ new->ru_rw = RU_RECOVER;
+ list_add(&new->ru_list, &lc->region_users);
+ DMDEBUG("Assigning recovery work to %u/%s: %Lu",
+ who, lc->uuid + (strlen(lc->uuid) - 8), new->ru_region);
+
return 0;
}
next reply other threads:[~2007-04-26 16:54 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-04-26 16:54 jbrassow [this message]
-- strict thread matches above, loose matches on Subject: below --
2007-10-03 19:02 [Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-client.c jbrassow
2007-09-27 20:31 jbrassow
2007-09-26 3:15 jbrassow
2007-09-21 20:07 jbrassow
2007-09-13 15:24 jbrassow
2007-07-11 16:18 jbrassow
2007-04-26 16:55 jbrassow
2007-04-24 20:10 jbrassow
2007-04-24 20:08 jbrassow
2007-04-10 7:13 jbrassow
2007-04-10 7:12 jbrassow
2007-04-05 21:33 jbrassow
2007-04-05 21:32 jbrassow
2007-04-03 18:23 jbrassow
2007-04-03 18:21 jbrassow
2007-03-22 22:34 jbrassow
2007-03-22 22:22 jbrassow
2007-03-14 4:28 jbrassow
2007-02-26 17:38 jbrassow
2007-02-20 19:35 jbrassow
2007-02-19 16:29 jbrassow
2007-02-14 17:44 jbrassow
2007-02-02 17:22 jbrassow
2007-01-08 19:28 jbrassow
2006-12-07 18:58 jbrassow
2006-09-05 17:50 jbrassow
2006-09-05 17:48 jbrassow
2006-07-27 23:11 jbrassow
2006-07-27 23:11 jbrassow
2006-07-22 22:19 jbrassow
2006-07-22 22:19 jbrassow
2006-07-22 22:12 jbrassow
2006-06-29 19:49 jbrassow
2006-06-29 19:48 jbrassow
2006-06-29 19:46 jbrassow
2006-06-27 20:19 jbrassow
2006-06-15 19:48 jbrassow
2006-06-15 19:34 jbrassow
2006-06-13 16:26 jbrassow
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070426165451.24620.qmail@sourceware.org \
--to=jbrassow@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.