All of lore.kernel.org
 help / color / mirror / Atom feed
From: jbrassow@sourceware.org <jbrassow@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-client.c ...
Date: 26 Apr 2007 16:55:53 -0000	[thread overview]
Message-ID: <20070426165553.25025.qmail@sourceware.org> (raw)

CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL45
Changes by:	jbrassow at sourceware.org	2007-04-26 17:55:51

Modified files:
	cmirror-kernel/src: dm-cmirror-client.c dm-cmirror-server.c 

Log message:
	Bug 238031: cluster mirrors not handling all recovery/write conflicts
	
	Problem is that the kernel (main mirror code) does not do any marks/clears when
	writing to a region before its recovery.  So, it is not possible for the server
	to detect a conflict.  Basically, we must turn back on the
	'is_remote_recovering' function and disallow any writes to regions that are OR
	WILL BE recovering.
	
	It's really going to cause some pain during writes while mirrors are re-syncing.
	The better fix for the future is to have the writes always mark/clear the
	regions - then we can again remove the 'is_remote_recovering' function.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-client.c.diff?cvsroot=cluster&only_with_tag=RHEL45&r1=1.1.2.41.2.5&r2=1.1.2.41.2.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-server.c.diff?cvsroot=cluster&only_with_tag=RHEL45&r1=1.1.2.26.2.7&r2=1.1.2.26.2.8

--- cluster/cmirror-kernel/src/Attic/dm-cmirror-client.c	2007/04/24 20:10:20	1.1.2.41.2.5
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-client.c	2007/04/26 16:55:51	1.1.2.41.2.6
@@ -861,11 +861,10 @@
 	int rtn;
 	struct log_c *lc = (struct log_c *) log->context;
 
-/* take out optimization
 	if(atomic_read(&lc->in_sync) == 1){
 		return 0;
 	}
-*/
+
 	rtn = consult_server(lc, region, LRT_IS_REMOTE_RECOVERING, NULL);
 	return rtn;
 }
@@ -876,11 +875,11 @@
 	struct log_c *lc = (struct log_c *) log->context;
   
 	/* check known_regions, return if found */
-/* take out optimization
+
 	if(atomic_read(&lc->in_sync) == 1){
 		return 1;
 	}
-*/
+
 	if(!block){
 		return -EWOULDBLOCK;
 	}
@@ -1414,7 +1413,7 @@
 	.resume = cluster_resume,
 	.get_region_size = cluster_get_region_size,
 	.is_clean = cluster_is_clean,
-/*	.is_remote_recovering = cluster_is_remote_recovering,*/
+	.is_remote_recovering = cluster_is_remote_recovering,
 	.in_sync = cluster_in_sync,
 	.flush = cluster_flush,
 	.mark_region = cluster_mark_region,
@@ -1436,7 +1435,7 @@
 	.resume = cluster_resume,
 	.get_region_size = cluster_get_region_size,
 	.is_clean = cluster_is_clean,
-/*	.is_remote_recovering = cluster_is_remote_recovering,*/
+	.is_remote_recovering = cluster_is_remote_recovering,
 	.in_sync = cluster_in_sync,
 	.flush = cluster_flush,
 	.mark_region = cluster_mark_region,
--- cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c	2007/04/24 20:10:20	1.1.2.26.2.7
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c	2007/04/26 16:55:51	1.1.2.26.2.8
@@ -494,12 +494,32 @@
 
 static int server_is_remote_recovering(struct log_c *lc, struct log_request *lr)
 {
+	region_t region;
 	struct region_user *ru;
 
-	if ((ru = find_ru_by_region(lc, lr->u.lr_region)) && 
-	    (ru->ru_rw == RU_RECOVER))
+	/*
+	 * This gets a bit complicated.  I wish we didn't have to use this
+	 * function, but because the mirror code doesn't mark regions which
+	 * it writes to that are out-of-sync, we need this function.
+	 *
+	 * Problem is, we don't know how long the user is going to take to
+	 * write to the region after they have called this function.  So,
+	 * we are forced at this point to deny any writes to regions we
+	 * are recovering or might recover in the future.
+	 *
+	 * We can count on the client side to not send us one of these
+	 * requests if the mirror is known to be in-sync.
+	 *
+	 * And yes, it sucks to take this much time off the I/O.
+	 */
+	region = ext2_find_next_zero_bit((unsigned long *) lc->sync_bits,
+					 lc->region_count, 0);
+
+	if (lr->u.lr_region >= region) {
+		DMDEBUG("Remote recovery conflict: (%Lu >= %Lu)/%s",
+			lr->u.lr_region, region, lc->uuid + (strlen(lc->uuid) - 8));
 		lr->u.lr_int_rtn = 1;
-	else
+	} else
 		lr->u.lr_int_rtn = 0;
 
 	return 0;
@@ -639,24 +659,65 @@
 static int server_get_resync_work(struct log_c *lc, struct log_request *lr, uint32_t who)
 {
 	struct region_user *new;
+	int sync_search, conflict = 0;
+	region_t *region = &(lr->u.lr_region_rtn);
 
-	new = mempool_alloc(region_user_pool, GFP_NOFS);
-	if(!new){
-		lr->u.lr_int_rtn = 0;
-		return -ENOMEM;
+	lr->u.lr_int_rtn = 0; /* Default to no work */
+
+	if (lc->recovering_region != (uint64_t)-1) {
+		DMDEBUG("Someone is already recovering region %Lu/%s",
+			lc->recovering_region, lc->uuid + (strlen(lc->uuid) - 8));
+		return 0;
 	}
-	
-	if ((lr->u.lr_int_rtn = _core_get_resync_work(lc, &(lr->u.lr_region_rtn)))){
-		new->ru_nodeid = who;
-		new->ru_region = lr->u.lr_region_rtn;
-		new->ru_rw = RU_RECOVER;
-		list_add(&new->ru_list, &lc->region_users);
-		DMDEBUG("Assigning recovery work to %u/%s: %Lu",
-			who, lc->uuid + (strlen(lc->uuid) - 8), new->ru_region);
-	} else {
-		mempool_free(new, region_user_pool);
+
+	if (lc->sync_search >= lc->region_count) {
+		/*
+		 * FIXME: pvmove is not supported yet, but when it is,
+		 * an audit of sync_count changes will need to be made
+		 */
+		if ((lc->sync_count < lc->region_count) && !lc->sync_pass) {
+			lc->sync_search = 0;
+			lc->sync_pass++;
+		} else {
+			return 0;
+		}
+	}
+
+	for (sync_search = lc->sync_search;
+	     sync_search < lc->region_count;
+	     sync_search = (*region + 1)) {
+		*region = ext2_find_next_zero_bit((unsigned long *) lc->sync_bits,
+						  lc->region_count,
+						  sync_search);
+		if (find_ru_by_region(lc, *region)) {
+			conflict = 1;
+			DMDEBUG("Recovery blocked by outstanding write on region %Lu/%s",
+				*region, lc->uuid + (strlen(lc->uuid) - 8));
+		} else {
+			break;
+		}
 	}
 
+	if (*region >= lc->region_count)
+		return 0;
+
+	new = mempool_alloc(region_user_pool, GFP_NOFS);
+	if (!new)
+		return -ENOMEM;
+
+	if (!conflict)
+		lc->sync_search = *region + 1;
+
+	lc->recovering_region = *region;
+
+	lr->u.lr_int_rtn = 1; /* Assigning work */
+	new->ru_nodeid = who;
+	new->ru_region = *region;
+	new->ru_rw = RU_RECOVER;
+	list_add(&new->ru_list, &lc->region_users);
+	DMDEBUG("Assigning recovery work to %u/%s: %Lu",
+		who, lc->uuid + (strlen(lc->uuid) - 8), new->ru_region);
+
 	return 0;
 }
 



             reply	other threads:[~2007-04-26 16:55 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-04-26 16:55 jbrassow [this message]
  -- strict thread matches above, loose matches on Subject: below --
2007-10-03 19:02 [Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-client.c jbrassow
2007-09-27 20:31 jbrassow
2007-09-26  3:15 jbrassow
2007-09-21 20:07 jbrassow
2007-09-13 15:24 jbrassow
2007-07-11 16:18 jbrassow
2007-04-26 16:54 jbrassow
2007-04-24 20:10 jbrassow
2007-04-24 20:08 jbrassow
2007-04-10  7:13 jbrassow
2007-04-10  7:12 jbrassow
2007-04-05 21:33 jbrassow
2007-04-05 21:32 jbrassow
2007-04-03 18:23 jbrassow
2007-04-03 18:21 jbrassow
2007-03-22 22:34 jbrassow
2007-03-22 22:22 jbrassow
2007-03-14  4:28 jbrassow
2007-02-26 17:38 jbrassow
2007-02-20 19:35 jbrassow
2007-02-19 16:29 jbrassow
2007-02-14 17:44 jbrassow
2007-02-02 17:22 jbrassow
2007-01-08 19:28 jbrassow
2006-12-07 18:58 jbrassow
2006-09-05 17:50 jbrassow
2006-09-05 17:48 jbrassow
2006-07-27 23:11 jbrassow
2006-07-27 23:11 jbrassow
2006-07-22 22:19 jbrassow
2006-07-22 22:19 jbrassow
2006-07-22 22:12 jbrassow
2006-06-29 19:49 jbrassow
2006-06-29 19:48 jbrassow
2006-06-29 19:46 jbrassow
2006-06-27 20:19 jbrassow
2006-06-15 19:48 jbrassow
2006-06-15 19:34 jbrassow
2006-06-13 16:26 jbrassow

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070426165553.25025.qmail@sourceware.org \
    --to=jbrassow@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.