[Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-client.c ...

cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed

From: jbrassow@sourceware.org <jbrassow@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-client.c ...
Date: 26 Apr 2007 16:54:51 -0000	[thread overview]
Message-ID: <20070426165451.24620.qmail@sourceware.org> (raw)

CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL4
Changes by:	jbrassow at sourceware.org	2007-04-26 17:54:49

Modified files:
	cmirror-kernel/src: dm-cmirror-client.c dm-cmirror-server.c 

Log message:
	Bug 238031: cluster mirrors not handling all recovery/write conflicts
	
	Problem is that the kernel (main mirror code) does not do any marks/clears when
	writing to a region before its recovery.  So, it is not possible for the server
	to detect a conflict.  Basically, we must turn back on the
	'is_remote_recovering' function and disallow any writes to regions that are OR
	WILL BE recovering.
	
	It's really going to cause some pain during writes while mirrors are re-syncing.
	The better fix for the future is to have the writes always mark/clear the
	regions - then we can again remove the 'is_remote_recovering' function.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-client.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.46&r2=1.1.2.47
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-server.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.34&r2=1.1.2.35

--- cluster/cmirror-kernel/src/Attic/dm-cmirror-client.c	2007/04/24 20:08:57	1.1.2.46
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-client.c	2007/04/26 16:54:49	1.1.2.47
@@ -861,11 +861,10 @@
 	int rtn;
 	struct log_c *lc = (struct log_c *) log->context;
 
-/* take out optimization
 	if(atomic_read(&lc->in_sync) == 1){
 		return 0;
 	}
-*/
+
 	rtn = consult_server(lc, region, LRT_IS_REMOTE_RECOVERING, NULL);
 	return rtn;
 }
@@ -876,11 +875,11 @@
 	struct log_c *lc = (struct log_c *) log->context;
   
 	/* check known_regions, return if found */
-/* take out optimization
+
 	if(atomic_read(&lc->in_sync) == 1){
 		return 1;
 	}
-*/
+
 	if(!block){
 		return -EWOULDBLOCK;
 	}
@@ -1414,7 +1413,7 @@
 	.resume = cluster_resume,
 	.get_region_size = cluster_get_region_size,
 	.is_clean = cluster_is_clean,
-/*	.is_remote_recovering = cluster_is_remote_recovering,*/
+	.is_remote_recovering = cluster_is_remote_recovering,
 	.in_sync = cluster_in_sync,
 	.flush = cluster_flush,
 	.mark_region = cluster_mark_region,
@@ -1436,7 +1435,7 @@
 	.resume = cluster_resume,
 	.get_region_size = cluster_get_region_size,
 	.is_clean = cluster_is_clean,
-/*	.is_remote_recovering = cluster_is_remote_recovering,*/
+	.is_remote_recovering = cluster_is_remote_recovering,
 	.in_sync = cluster_in_sync,
 	.flush = cluster_flush,
 	.mark_region = cluster_mark_region,
--- cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c	2007/04/24 20:08:57	1.1.2.34
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c	2007/04/26 16:54:49	1.1.2.35
@@ -494,12 +494,32 @@
 
 static int server_is_remote_recovering(struct log_c *lc, struct log_request *lr)
 {
+	region_t region;
 	struct region_user *ru;
 
-	if ((ru = find_ru_by_region(lc, lr->u.lr_region)) && 
-	    (ru->ru_rw == RU_RECOVER))
+	/*
+	 * This gets a bit complicated.  I wish we didn't have to use this
+	 * function, but because the mirror code doesn't mark regions which
+	 * it writes to that are out-of-sync, we need this function.
+	 *
+	 * Problem is, we don't know how long the user is going to take to
+	 * write to the region after they have called this function.  So,
+	 * we are forced at this point to deny any writes to regions we
+	 * are recovering or might recover in the future.
+	 *
+	 * We can count on the client side to not send us one of these
+	 * requests if the mirror is known to be in-sync.
+	 *
+	 * And yes, it sucks to take this much time off the I/O.
+	 */
+	region = ext2_find_next_zero_bit((unsigned long *) lc->sync_bits,
+					 lc->region_count, 0);
+
+	if (lr->u.lr_region >= region) {
+		DMDEBUG("Remote recovery conflict: (%Lu >= %Lu)/%s",
+			lr->u.lr_region, region, lc->uuid + (strlen(lc->uuid) - 8));
 		lr->u.lr_int_rtn = 1;
-	else
+	} else
 		lr->u.lr_int_rtn = 0;
 
 	return 0;
@@ -639,24 +659,65 @@
 static int server_get_resync_work(struct log_c *lc, struct log_request *lr, uint32_t who)
 {
 	struct region_user *new;
+	int sync_search, conflict = 0;
+	region_t *region = &(lr->u.lr_region_rtn);
 
-	new = mempool_alloc(region_user_pool, GFP_NOFS);
-	if(!new){
-		lr->u.lr_int_rtn = 0;
-		return -ENOMEM;
+	lr->u.lr_int_rtn = 0; /* Default to no work */
+
+	if (lc->recovering_region != (uint64_t)-1) {
+		DMDEBUG("Someone is already recovering region %Lu/%s",
+			lc->recovering_region, lc->uuid + (strlen(lc->uuid) - 8));
+		return 0;
 	}
-	
-	if ((lr->u.lr_int_rtn = _core_get_resync_work(lc, &(lr->u.lr_region_rtn)))){
-		new->ru_nodeid = who;
-		new->ru_region = lr->u.lr_region_rtn;
-		new->ru_rw = RU_RECOVER;
-		list_add(&new->ru_list, &lc->region_users);
-		DMDEBUG("Assigning recovery work to %u/%s: %Lu",
-			who, lc->uuid + (strlen(lc->uuid) - 8), new->ru_region);
-	} else {
-		mempool_free(new, region_user_pool);
+
+	if (lc->sync_search >= lc->region_count) {
+		/*
+		 * FIXME: pvmove is not supported yet, but when it is,
+		 * an audit of sync_count changes will need to be made
+		 */
+		if ((lc->sync_count < lc->region_count) && !lc->sync_pass) {
+			lc->sync_search = 0;
+			lc->sync_pass++;
+		} else {
+			return 0;
+		}
+	}
+
+	for (sync_search = lc->sync_search;
+	     sync_search < lc->region_count;
+	     sync_search = (*region + 1)) {
+		*region = ext2_find_next_zero_bit((unsigned long *) lc->sync_bits,
+						  lc->region_count,
+						  sync_search);
+		if (find_ru_by_region(lc, *region)) {
+			conflict = 1;
+			DMDEBUG("Recovery blocked by outstanding write on region %Lu/%s",
+				*region, lc->uuid + (strlen(lc->uuid) - 8));
+		} else {
+			break;
+		}
 	}
 
+	if (*region >= lc->region_count)
+		return 0;
+
+	new = mempool_alloc(region_user_pool, GFP_NOFS);
+	if (!new)
+		return -ENOMEM;
+
+	if (!conflict)
+		lc->sync_search = *region + 1;
+
+	lc->recovering_region = *region;
+
+	lr->u.lr_int_rtn = 1; /* Assigning work */
+	new->ru_nodeid = who;
+	new->ru_region = *region;
+	new->ru_rw = RU_RECOVER;
+	list_add(&new->ru_list, &lc->region_users);
+	DMDEBUG("Assigning recovery work to %u/%s: %Lu",
+		who, lc->uuid + (strlen(lc->uuid) - 8), new->ru_region);
+
 	return 0;
 }

next             reply	other threads:[~2007-04-26 16:54 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-04-26 16:54 jbrassow [this message]
  -- strict thread matches above, loose matches on Subject: below --
2007-10-03 19:02 [Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-client.c jbrassow
2007-09-27 20:31 jbrassow
2007-09-26  3:15 jbrassow
2007-09-21 20:07 jbrassow
2007-09-13 15:24 jbrassow
2007-07-11 16:18 jbrassow
2007-04-26 16:55 jbrassow
2007-04-24 20:10 jbrassow
2007-04-24 20:08 jbrassow
2007-04-10  7:13 jbrassow
2007-04-10  7:12 jbrassow
2007-04-05 21:33 jbrassow
2007-04-05 21:32 jbrassow
2007-04-03 18:23 jbrassow
2007-04-03 18:21 jbrassow
2007-03-22 22:34 jbrassow
2007-03-22 22:22 jbrassow
2007-03-14  4:28 jbrassow
2007-02-26 17:38 jbrassow
2007-02-20 19:35 jbrassow
2007-02-19 16:29 jbrassow
2007-02-14 17:44 jbrassow
2007-02-02 17:22 jbrassow
2007-01-08 19:28 jbrassow
2006-12-07 18:58 jbrassow
2006-09-05 17:50 jbrassow
2006-09-05 17:48 jbrassow
2006-07-27 23:11 jbrassow
2006-07-27 23:11 jbrassow
2006-07-22 22:19 jbrassow
2006-07-22 22:19 jbrassow
2006-07-22 22:12 jbrassow
2006-06-29 19:49 jbrassow
2006-06-29 19:48 jbrassow
2006-06-29 19:46 jbrassow
2006-06-27 20:19 jbrassow
2006-06-15 19:48 jbrassow
2006-06-15 19:34 jbrassow
2006-06-13 16:26 jbrassow

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070426165451.24620.qmail@sourceware.org \
    --to=jbrassow@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).