[Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-client.c ...

All of lore.kernel.org
 help / color / mirror / Atom feed

From: jbrassow@sourceware.org <jbrassow@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-client.c ...
Date: 11 Jul 2007 16:18:04 -0000	[thread overview]
Message-ID: <20070711161804.22419.qmail@sourceware.org> (raw)

CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL4
Changes by:	jbrassow at sourceware.org	2007-07-11 16:18:03

Modified files:
	cmirror-kernel/src: dm-cmirror-client.c dm-cmirror-server.c 
	                    dm-cmirror-xfr.h 

Log message:
	Bug 238629: dm-cmirror: Remote recovery conflict...
	
	The kernel changes are now in place (marking/clearing the log
	during writes to nosync regions) to allow nominal I/O to
	region that have yet to be recovered.
	
	Also moved around some debugging messages and removed
	'is_remote_recovering()'.  (is_remote_recovering is obviated by
	the new mechanism for handling recovery/write ordering.)

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-client.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.48&r2=1.1.2.49
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-server.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.35&r2=1.1.2.36
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-xfr.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.5&r2=1.1.2.6

--- cluster/cmirror-kernel/src/Attic/dm-cmirror-client.c	2007/05/09 21:44:34	1.1.2.48
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-client.c	2007/07/11 16:18:03	1.1.2.49
@@ -286,8 +286,8 @@
 		lc->server_id = lr.u.lr_coordinator;
 	} else {
 		/* ATTENTION -- what do we do with this ? */
-		DMWARN("Failed to receive election results from server: (%s,%d)",
-		       lc->uuid + (strlen(lc->uuid) - 8), len);
+		DMWARN("Failed to receive election results from server: (%s/%d,%d)",
+		       lc->uuid + (strlen(lc->uuid) - 8), lc->uuid_ref, len);
 		error = len;
 	}
 
@@ -601,6 +601,7 @@
 	INIT_LIST_HEAD(&lc->mark_logged);
 	spin_lock_init(&lc->state_lock);
 
+	atomic_set(&lc->suspended, 1);
 	lc->server_valid = 0;
 	lc->server_id = 0xDEAD;
 
@@ -853,15 +854,7 @@
 
 static int cluster_is_remote_recovering(struct dirty_log *log, region_t region)
 {
-	int rtn;
-	struct log_c *lc = (struct log_c *) log->context;
-
-	if(atomic_read(&lc->in_sync) == 1){
-		return 0;
-	}
-
-	rtn = consult_server(lc, region, LRT_IS_REMOTE_RECOVERING, NULL);
-	return rtn;
+	return 0;
 }
 
 static int cluster_in_sync(struct dirty_log *log, region_t region, int block)
@@ -977,57 +970,43 @@
 
 	spin_lock(&lc->state_lock);
 
-
 	/*
-	 * It is possible for the following in the mirror code:
-	 *  0) Mark is already logged for a region
-	 *  1) rh_dec, sets region state to RH_CLEAN (asynchronous)
-	 *  2) rh_update_states (DOESN'T FLUSH!!!, bug #235040)
-	 *  3) do_writes, trys to mark region
-	 *
-	 * The following shouldn't have to be handled b/c of the flush
-	 *  0) Region finishes recovery
-	 *  1) rh_update_states clears region (DOES FLUSH)
-	 *  2) do_writes, trys to mark region
-	 *
-	 * This can lead to this next case being valid.
+	 * An item on the clear_waiting list should have been flushed
+	 * before getting this mark_region call.
 	 */
-	list_for_each_entry_safe(rs, tmp_rs, &lc->clear_waiting, rs_list) {
+	list_for_each_entry_safe(rs, tmp_rs, &lc->clear_waiting, rs_list)
 		if (region == rs->rs_region) {
-			if (!rs->rs_mark_logged) {
-				DMERR("Moving region(%Lu/%s) from clear_waiting -> mark_waiting",
-				      region, lc->uuid + (strlen(lc->uuid) - 8));
-			}
-			list_del_init(&rs->rs_list);
-			list_add(&rs->rs_list,
-				 (rs->rs_mark_logged) ?
-				 &lc->mark_logged : &lc->mark_waiting);
-			goto out;
+			DMERR("Region being marked found on clear_waiting list (%Lu/%s)",
+			      region, lc->uuid + (strlen(lc->uuid) - 8));
+			BUG();
 		}
-	}
+
+	/*
+	 * We should never get two marks before a flush, unless the
+	 * region is not in-sync.  One valid scenario would be:
+	 *  0) region not in-sync
+	 *  1) rh_inc (mark region)
+	 *  2) rh_update_states
+	 *  3) rh_dec (dec pending and put on clean_region list)
+	 *  4) do_writes -> rh_inc (second mark)
+	 */
+	list_for_each_entry_safe(rs, tmp_rs, &lc->mark_waiting, rs_list)
+		if (region == rs->rs_region)
+			goto out;
 
 	/*
 	 * It is possible for the following in the mirror code:
 	 *  0) Mark is already logged for a region
-	 *  1) rh_update_states
+	 *  1) rh_update_states (were the clear_region happens)
 	 *  2) rh_dec, sets region state to RH_CLEAN (asynchronous)
 	 *  3) do_writes, trys to mark region
 	 *
 	 * This can lead to this next case being valid.
 	 */
-	list_for_each_entry_safe(rs, tmp_rs, &lc->mark_logged, rs_list){
-		if (region == rs->rs_region) {
+	list_for_each_entry_safe(rs, tmp_rs, &lc->mark_logged, rs_list)
+		if (region == rs->rs_region)
 			goto out;
-		}
-	}
 
-	list_for_each_entry_safe(rs, tmp_rs, &lc->mark_waiting, rs_list){
-		if (region == rs->rs_region) {
-			DMERR("Mark already waiting (%Lu/%s)",
-			      region, lc->uuid + (strlen(lc->uuid) - 8));
-			BUG();
-		}
-	}
 	spin_unlock(&lc->state_lock);
 
 	rs_new = mempool_alloc(region_state_pool, GFP_NOFS);
@@ -1074,14 +1053,13 @@
 	 * 6) we recover the region
 	 * 7) clearing the region after recovery causes us to get here
 	 *
-	 * Once 235040 is cleared, any entries found in this list should
-	 * cause a bug.
+	 * Bug 235040 cleared.  This should no longer happen.
 	 */
 	list_for_each_entry_safe(rs, tmp_rs, &lc->clear_waiting, rs_list){
 		if(region == rs->rs_region){
-			DMERR("%d) Double clear on region ("
-			      SECTOR_FORMAT ")", __LINE__, region);
-			goto out;
+			DMERR("Double clear on region (%Lu/%s)",
+			      region, lc->uuid + (strlen(lc->uuid) - 8));
+			BUG();
 		}
 	}
 
@@ -1140,7 +1118,7 @@
 		list_for_each_entry_safe(rs, tmp_rs, &lc->clear_waiting, rs_list) {
 			if (*region == rs->rs_region) {
 				DMERR("WARNING: Bug 235039/235040 detected!");
-				DMERR("Work-around in place.");
+				BUG();
 			}
 		}
 	}
@@ -1204,16 +1182,6 @@
 
 	switch(status){
 	case STATUSTYPE_INFO:
-		DMDEBUG("LOG INFO:");
-		DMDEBUG("  uuid: %s", lc->uuid);
-		DMDEBUG("  uuid_ref    : %d", lc->uuid_ref);
-		DMDEBUG(" ?region_count: %Lu", lc->region_count);
-		DMDEBUG(" ?sync_count  : %Lu", lc->sync_count);
-		DMDEBUG(" ?sync_search : %d", lc->sync_search);
-		DMDEBUG("  in_sync     : %s", (atomic_read(&lc->in_sync)) ? "YES" : "NO");
-		DMDEBUG("  server_id   : %u", lc->server_id);
-		DMDEBUG("  server_valid: %s",
-			((lc->server_id != 0xDEAD) && lc->server_valid) ? "YES" : "NO");
 		if(lc->sync != DEFAULTSYNC)
 			arg_count++;
 
@@ -1228,6 +1196,42 @@
                 break;
 
         case STATUSTYPE_TABLE:
+		DMDEBUG("LOG INFO:");
+		DMDEBUG("  uuid: %s", lc->uuid);
+		DMDEBUG("  uuid_ref    : %d", lc->uuid_ref);
+		DMDEBUG(" ?region_count: %Lu", lc->region_count);
+		DMDEBUG(" ?sync_count  : %Lu", lc->sync_count);
+		DMDEBUG(" ?sync_search : %d", lc->sync_search);
+		DMDEBUG("  in_sync     : %s", (atomic_read(&lc->in_sync)) ? "YES" : "NO");
+		DMDEBUG("  suspended   : %s", (atomic_read(&lc->suspended)) ? "YES" : "NO");
+		DMDEBUG("  server_id   : %u", lc->server_id);
+		DMDEBUG("  server_valid: %s",
+			((lc->server_id != 0xDEAD) && lc->server_valid) ? "YES" : "NO");
+		{
+			struct log_c *tmp_lc;
+
+			down(&log_list_lock);
+			list_for_each_entry(tmp_lc, &log_list_head, log_list){
+				if(!strncmp(tmp_lc->uuid, lc->uuid, MAX_NAME_LEN) &&
+				   (tmp_lc->uuid_ref != lc->uuid_ref)){
+					DMDEBUG("LOG INFO [COPY FOUND]:");
+					DMDEBUG("  uuid: %s", tmp_lc->uuid);
+					DMDEBUG("  uuid_ref    : %d", tmp_lc->uuid_ref);
+					DMDEBUG(" ?region_count: %Lu", tmp_lc->region_count);
+					DMDEBUG(" ?sync_count  : %Lu", tmp_lc->sync_count);
+					DMDEBUG(" ?sync_search : %d", tmp_lc->sync_search);
+					DMDEBUG("  in_sync     : %s",
+						(atomic_read(&tmp_lc->in_sync)) ? "YES" : "NO");
+					DMDEBUG("  suspended   : %s",
+						(atomic_read(&tmp_lc->suspended)) ? "YES" : "NO");
+					DMDEBUG("  server_id   : %u", tmp_lc->server_id);
+					DMDEBUG("  server_valid: %s",
+						((tmp_lc->server_id != 0xDEAD) &&
+						 tmp_lc->server_valid) ? "YES" : "NO");
+				}
+			}
+			up(&log_list_lock);
+		}
 		if(lc->sync != DEFAULTSYNC)
 			arg_count++;
 
--- cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c	2007/04/26 16:54:49	1.1.2.35
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c	2007/07/11 16:18:03	1.1.2.36
@@ -221,54 +221,6 @@
 	return count;
 }
 
-struct region_user *find_ru_by_region(struct log_c *lc, region_t region);
-static int _core_get_resync_work(struct log_c *lc, region_t *region)
-{
-	int sync_search, conflict = 0;
-
-	if (lc->recovering_region != (uint64_t)-1) {
-		DMDEBUG("Someone is already recovering region %Lu/%s",
-			lc->recovering_region, lc->uuid + (strlen(lc->uuid) - 8));
-		return 0;
-	}
-
-	if (lc->sync_search >= lc->region_count) {
-		/*
-		 * FIXME: pvmove is not supported yet, but when it is,
-		 * an audit of sync_count changes will need to be made
-		 */
-		if ((lc->sync_count < lc->region_count) && !lc->sync_pass) {
-			lc->sync_search = 0;
-			lc->sync_pass++;
-		} else {
-			return 0;
-		}
-	}
-	for (sync_search = lc->sync_search;
-	     sync_search < lc->region_count;
-	     sync_search = (*region + 1)) {
-		*region = ext2_find_next_zero_bit((unsigned long *) lc->sync_bits,
-						  lc->region_count,
-						  sync_search);
-		if (find_ru_by_region(lc, *region)) {
-			conflict = 1;
-			DMDEBUG("Recovery blocked by outstanding write on region %Lu/%s",
-				*region, lc->uuid + (strlen(lc->uuid) - 8));
-		} else {
-			break;
-		}
-	}
-	if (!conflict)
-		lc->sync_search = *region + 1;
-
-	if (*region >= lc->region_count)
-		return 0;
-
-	lc->recovering_region = *region;
-	return 1;
-}
-
-
 static int print_zero_bits(unsigned char *str, int offset, int bit_count){
 	int i,j;
 	int count=0;
@@ -492,39 +444,6 @@
 	return 0;
 }
 
-static int server_is_remote_recovering(struct log_c *lc, struct log_request *lr)
-{
-	region_t region;
-	struct region_user *ru;
-
-	/*
-	 * This gets a bit complicated.  I wish we didn't have to use this
-	 * function, but because the mirror code doesn't mark regions which
-	 * it writes to that are out-of-sync, we need this function.
-	 *
-	 * Problem is, we don't know how long the user is going to take to
-	 * write to the region after they have called this function.  So,
-	 * we are forced at this point to deny any writes to regions we
-	 * are recovering or might recover in the future.
-	 *
-	 * We can count on the client side to not send us one of these
-	 * requests if the mirror is known to be in-sync.
-	 *
-	 * And yes, it sucks to take this much time off the I/O.
-	 */
-	region = ext2_find_next_zero_bit((unsigned long *) lc->sync_bits,
-					 lc->region_count, 0);
-
-	if (lr->u.lr_region >= region) {
-		DMDEBUG("Remote recovery conflict: (%Lu >= %Lu)/%s",
-			lr->u.lr_region, region, lc->uuid + (strlen(lc->uuid) - 8));
-		lr->u.lr_int_rtn = 1;
-	} else
-		lr->u.lr_int_rtn = 0;
-
-	return 0;
-}
-
 static int server_in_sync(struct log_c *lc, struct log_request *lr)
 {
 	if (lr->u.lr_region > lc->region_count) {
@@ -555,7 +474,13 @@
 	new->ru_region = lr->u.lr_region;
 	new->ru_rw = RU_WRITE;
 
-	if (!(ru = find_ru_by_region(lc, lr->u.lr_region))) {
+	if (find_ru(lc, who, lr->u.lr_region)) {
+		DMWARN("Attempt to mark a already marked region (%u,"
+		       SECTOR_FORMAT
+		       "/%s)",
+		       who, lr->u.lr_region, lc->uuid + (strlen(lc->uuid) - 8));
+		mempool_free(new, region_user_pool);
+	} else if (!(ru = find_ru_by_region(lc, lr->u.lr_region))) {
 		log_clear_bit(lc, lc->clean_bits, lr->u.lr_region);
 		list_add(&new->ru_list, &lc->region_users);
 	} else if (ru->ru_rw == RU_RECOVER) {
@@ -572,6 +497,22 @@
 		DMDEBUG("Mark requester   : %u", who);
 		log_clear_bit(lc, lc->clean_bits, lr->u.lr_region);
 		list_add_tail(&new->ru_list, &lc->region_users);
+	} else {
+		list_add(&new->ru_list, &ru->ru_list);
+	}
+
+	/*
+	if (!(ru = find_ru_by_region(lc, lr->u.lr_region))) {
+		log_clear_bit(lc, lc->clean_bits, lr->u.lr_region);
+		list_add(&new->ru_list, &lc->region_users);
+	} else if (ru->ru_rw == RU_RECOVER) {
+		DMDEBUG("Attempt to mark a region " SECTOR_FORMAT
+		      "/%s which is being recovered.",
+		       lr->u.lr_region, lc->uuid + (strlen(lc->uuid) - 8));
+		DMDEBUG("Current recoverer: %u", ru->ru_nodeid);
+		DMDEBUG("Mark requester   : %u", who);
+		log_clear_bit(lc, lc->clean_bits, lr->u.lr_region);
+		list_add_tail(&new->ru_list, &lc->region_users);
 	} else if (!find_ru(lc, who, lr->u.lr_region)) {
 		list_add(&new->ru_list, &ru->ru_list);
 	} else {
@@ -581,6 +522,7 @@
 		       who, lr->u.lr_region, lc->uuid + (strlen(lc->uuid) - 8));
 		mempool_free(new, region_user_pool);
 	}
+	*/
 
 	return 0;
 }
@@ -611,35 +553,28 @@
 {
 	int r = 0;
 	int count = 0;
-	struct region_user *ru, *ru2;
+	struct region_user *ru, *marker = NULL, *recoverer = NULL;
 
 	if (lc->recovering_region != (uint64_t)-1) {
 		list_for_each_entry(ru, &lc->region_users, ru_list)
-			if (ru->ru_region == lc->recovering_region)
-				count++;
-
-		if (count > 1) {
-			list_for_each_entry(ru, &lc->region_users, ru_list)
+			if (ru->ru_region == lc->recovering_region) {
 				if (ru->ru_rw == RU_RECOVER)
-					break;
+					recoverer = ru;
+				else
+					marker = ru;
+				count++;
+			}
 
-			DMDEBUG("Flush includes region which is being recovered (%u/%Lu).  Delaying...",
-				ru->ru_nodeid, ru->ru_region);
-			DMDEBUG("Recovering region: %Lu", lc->recovering_region);
+		if (marker && recoverer) {
+			DMDEBUG("Flush/recovery collision (%Lu/%s): count = %d, marker = %u, recoverer = %u",
+				marker->ru_region, lc->uuid + (strlen(lc->uuid) - 8),
+				count, marker->ru_nodeid, recoverer->ru_nodeid);
+			/*
 			DMDEBUG("  sync_bit: %s, clean_bit: %s",
 				log_test_bit(lc->sync_bits, lc->recovering_region) ? "set" : "unset",
 				log_test_bit(lc->clean_bits, lc->recovering_region) ? "set" : "unset");
+			*/
 
-			list_for_each_entry(ru2, &lc->region_users, ru_list)
-				if (ru->ru_region == ru2->ru_region)
-					DMDEBUG("  %s", (ru2->ru_rw == RU_RECOVER) ? "recover" :
-						(ru2->ru_rw == RU_WRITE) ? "writer" : "unknown");
-
-			/* FIXME: work-around for bug 235040 */
-			DMDEBUG("Revoking resync work");
-			lc->recovering_region = (uint64_t)-1;
-			list_del(&ru->ru_list);
-			mempool_free(ru, region_user_pool);
 			return -EBUSY;
 		}
 	}
@@ -1121,9 +1056,6 @@
 		case LRT_IS_CLEAN:
 			error = server_is_clean(lc, lr);
 			break;
-		case LRT_IS_REMOTE_RECOVERING:
-			error = server_is_remote_recovering(lc, lr);
-			break;
 		case LRT_IN_SYNC:
 			error = server_in_sync(lc, lr);
 			break;
--- cluster/cmirror-kernel/src/Attic/dm-cmirror-xfr.h	2007/04/24 20:08:57	1.1.2.5
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-xfr.h	2007/07/11 16:18:03	1.1.2.6
@@ -10,19 +10,18 @@
 #define MAX_NAME_LEN 128
 
 #define LRT_IS_CLEAN			1
-#define LRT_IS_REMOTE_RECOVERING	2
-#define LRT_IN_SYNC             	3
-#define LRT_MARK_REGION         	4
-#define LRT_CLEAR_REGION        	5
-#define LRT_FLUSH                       6
-#define LRT_GET_RESYNC_WORK     	7
-#define LRT_COMPLETE_RESYNC_WORK        8
-#define LRT_GET_SYNC_COUNT      	9
-
-#define LRT_ELECTION			10
-#define LRT_SELECTION			11
-#define LRT_MASTER_ASSIGN		12
-#define LRT_MASTER_LEAVING		13
+#define LRT_IN_SYNC             	2
+#define LRT_MARK_REGION         	3
+#define LRT_CLEAR_REGION        	4
+#define LRT_FLUSH                       5
+#define LRT_GET_RESYNC_WORK     	6
+#define LRT_COMPLETE_RESYNC_WORK        7
+#define LRT_GET_SYNC_COUNT      	8
+
+#define LRT_ELECTION			9
+#define LRT_SELECTION			10
+#define LRT_MASTER_ASSIGN		11
+#define LRT_MASTER_LEAVING		12
 
 #define CLUSTER_LOG_PORT 51005

next             reply	other threads:[~2007-07-11 16:18 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-07-11 16:18 jbrassow [this message]
  -- strict thread matches above, loose matches on Subject: below --
2007-10-03 19:02 [Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-client.c jbrassow
2007-09-27 20:31 jbrassow
2007-09-26  3:15 jbrassow
2007-09-21 20:07 jbrassow
2007-09-13 15:24 jbrassow
2007-04-26 16:55 jbrassow
2007-04-26 16:54 jbrassow
2007-04-24 20:10 jbrassow
2007-04-24 20:08 jbrassow
2007-04-10  7:13 jbrassow
2007-04-10  7:12 jbrassow
2007-04-05 21:33 jbrassow
2007-04-05 21:32 jbrassow
2007-04-03 18:23 jbrassow
2007-04-03 18:21 jbrassow
2007-03-22 22:34 jbrassow
2007-03-22 22:22 jbrassow
2007-03-14  4:28 jbrassow
2007-02-26 17:38 jbrassow
2007-02-20 19:35 jbrassow
2007-02-19 16:29 jbrassow
2007-02-14 17:44 jbrassow
2007-02-02 17:22 jbrassow
2007-01-08 19:28 jbrassow
2006-12-07 18:58 jbrassow
2006-09-05 17:50 jbrassow
2006-09-05 17:48 jbrassow
2006-07-27 23:11 jbrassow
2006-07-27 23:11 jbrassow
2006-07-22 22:19 jbrassow
2006-07-22 22:19 jbrassow
2006-07-22 22:12 jbrassow
2006-06-29 19:49 jbrassow
2006-06-29 19:48 jbrassow
2006-06-29 19:46 jbrassow
2006-06-27 20:19 jbrassow
2006-06-15 19:48 jbrassow
2006-06-15 19:34 jbrassow
2006-06-13 16:26 jbrassow

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070711161804.22419.qmail@sourceware.org \
    --to=jbrassow@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.