cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed
From: jbrassow@sourceware.org <jbrassow@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-client.c ...
Date: 21 Sep 2007 20:07:38 -0000	[thread overview]
Message-ID: <20070921200738.26579.qmail@sourceware.org> (raw)

CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL4
Changes by:	jbrassow at sourceware.org	2007-09-21 20:07:37

Modified files:
	cmirror-kernel/src: dm-cmirror-client.c dm-cmirror-server.c 
	                    dm-cmirror-xfr.h 

Log message:
	Bug 291521: Cluster mirror can become out-of-sync if nominal I/O overla...
	
	It is insufficient to simply delay flush requests that have marks
	pending to a recovering region.  Although a collision between nominal
	I/O and resync I/O can be avoided this way, the state of the region
	changes from RH_NOSYNC to RH_CLEAN in the mean time.  The machine
	being delayed will think the region is still in the RH_NOSYNC state
	and only write to the primary device... leaving the other mirror
	devices out-of-sync.
	
	We must delay writes to remotely recovering regions before the state
	of the region is determined and cached in the region caching code...
	The entry point for this already exists in 'is_remote_recovering'.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-client.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.51&r2=1.1.2.52
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-server.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.37&r2=1.1.2.38
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-xfr.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.6&r2=1.1.2.7

--- cluster/cmirror-kernel/src/Attic/dm-cmirror-client.c	2007/09/13 15:24:20	1.1.2.51
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-client.c	2007/09/21 20:07:37	1.1.2.52
@@ -858,7 +858,15 @@
 
 static int cluster_is_remote_recovering(struct dirty_log *log, region_t region)
 {
-	return 0;
+	int rtn;
+	struct log_c *lc = (struct log_c *) log->context;
+ 	 
+	if (atomic_read(&lc->in_sync) == 1) {
+		return 0;
+	}
+
+	rtn = consult_server(lc, region, LRT_IS_REMOTE_RECOVERING, NULL);
+	return rtn;
 }
 
 static int cluster_in_sync(struct dirty_log *log, region_t region, int block)
--- cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c	2007/09/13 15:24:20	1.1.2.37
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c	2007/09/21 20:07:37	1.1.2.38
@@ -444,6 +444,24 @@
 	return 0;
 }
 
+static int server_is_remote_recovering(struct log_c *lc, struct log_request *lr)
+{
+	uint64_t high, low;
+
+	high = lc->sync_search + 10;
+	low = (lc->recovering_region != (uint64_t)-1) ?
+		lc->recovering_region :
+		lc->sync_search;
+	if ((lr->u.lr_region >= low) && (lr->u.lr_region <= high)) {
+		DMDEBUG("Remote recovery conflict: %Lu/%s",
+			lr->u.lr_region, lc->uuid + (strlen(lc->uuid) - 8));
+		lr->u.lr_int_rtn = 1;
+	} else
+		lr->u.lr_int_rtn = 0;
+
+	return 0;
+}
+
 static int server_in_sync(struct log_c *lc, struct log_request *lr)
 {
 	if (lr->u.lr_region > lc->region_count) {
@@ -485,51 +503,28 @@
 		list_add(&new->ru_list, &lc->region_users);
 	} else if (ru->ru_rw == RU_RECOVER) {
 		/*
-		 * The flush will block if a write conflicts with a
-		 * recovering region.  In the meantime, we add this
-		 * entry to the tail of the list so the recovery
-		 * gets cleared first.
-		 */
-		DMDEBUG("Attempt to mark a region " SECTOR_FORMAT
-		      "/%s which is being recovered.",
-		       lr->u.lr_region, lc->uuid + (strlen(lc->uuid) - 8));
-		DMDEBUG("Current recoverer: %u", ru->ru_nodeid);
-		DMDEBUG("Mark requester   : %u", who);
-		log_clear_bit(lc, lc->clean_bits, lr->u.lr_region);
-		list_add_tail(&new->ru_list, &lc->region_users);
+		 * A mark that happens to a region in recovery
+		 * means certain corruption.
+		 */
+		DMERR("Mark attempted to recovering region by %u: %Lu/%s",
+		      who, lr->u.lr_region,
+		      lc->uuid + (strlen(lc->uuid) - 8));
+		DMERR("  lc->recovering_region = %Lu", lc->recovering_region);
+		DMERR("  ru->ru_rw             = %d", ru->ru_rw);
+		DMERR("  ru->ru_nodeid         = %u", ru->ru_nodeid);
+		DMERR("  ru->ru_region         = %Lu", ru->ru_region);
+		BUG();
 	} else {
 		list_add(&new->ru_list, &ru->ru_list);
 	}
 
-	/*
-	if (!(ru = find_ru_by_region(lc, lr->u.lr_region))) {
-		log_clear_bit(lc, lc->clean_bits, lr->u.lr_region);
-		list_add(&new->ru_list, &lc->region_users);
-	} else if (ru->ru_rw == RU_RECOVER) {
-		DMDEBUG("Attempt to mark a region " SECTOR_FORMAT
-		      "/%s which is being recovered.",
-		       lr->u.lr_region, lc->uuid + (strlen(lc->uuid) - 8));
-		DMDEBUG("Current recoverer: %u", ru->ru_nodeid);
-		DMDEBUG("Mark requester   : %u", who);
-		log_clear_bit(lc, lc->clean_bits, lr->u.lr_region);
-		list_add_tail(&new->ru_list, &lc->region_users);
-	} else if (!find_ru(lc, who, lr->u.lr_region)) {
-		list_add(&new->ru_list, &ru->ru_list);
-	} else {
-		DMWARN("Attempt to mark a already marked region (%u,"
-		       SECTOR_FORMAT
-		       "/%s)",
-		       who, lr->u.lr_region, lc->uuid + (strlen(lc->uuid) - 8));
-		mempool_free(new, region_user_pool);
-	}
-	*/
-
 	return 0;
 }
 
 
 static int server_clear_region(struct log_c *lc, struct log_request *lr, uint32_t who)
 {
+	int check_bug = 0;
 	struct region_user *ru;
 
 	ru = find_ru(lc, who, lr->u.lr_region);
@@ -538,13 +533,22 @@
 		       who, lr->u.lr_region);
 		return -EINVAL;
 	} else {
+		if (lc->recovering_region == lr->u.lr_region) {
+			lc->recovering_region = (uint64_t)-1;
+			check_bug = 1;
+		}
 		list_del(&ru->ru_list);
 		mempool_free(ru, region_user_pool);
 	}
 
-	if(!find_ru_by_region(lc, lr->u.lr_region)){
+	if (!find_ru_by_region(lc, lr->u.lr_region)) {
 		log_set_bit(lc, lc->clean_bits, lr->u.lr_region);
+	} else if (check_bug) {
+		DMERR("Multiple marks exist on a region being recovered: %Lu/%s",
+		      lr->u.lr_region, lc->uuid + (strlen(lc->uuid) - 8));
+		BUG();
 	}
+		
 	return 0;
 }
 
@@ -552,37 +556,17 @@
 static int server_flush(struct log_c *lc, uint32_t who)
 {
 	int r = 0;
-	int count = 0;
-	int do_flush = 1;
-	struct region_user *ru, *marker = NULL, *recoverer = NULL;
+	struct region_user *ru;
 
 	if (lc->recovering_region != (uint64_t)-1) {
-		list_for_each_entry(ru, &lc->region_users, ru_list)
-			if (ru->ru_region == lc->recovering_region) {
-				if (ru->ru_rw == RU_RECOVER)
-					recoverer = ru;
-				else if (ru->ru_nodeid == who) {
-					do_flush = 0;
-					marker = ru;
-				} else
-					marker = ru;
-
-				count++;
-			}
-
-		if (marker && recoverer) {
-			DMDEBUG("Flush/recovery collision on %Lu/%s: count = %d, marker = %u, recoverer = %u",
-				marker->ru_region, lc->uuid + (strlen(lc->uuid) - 8),
-				count, marker->ru_nodeid, recoverer->ru_nodeid);
-			DMDEBUG("  Count     = %d", count);
-			DMDEBUG("  Marker    = %u", marker->ru_nodeid);
-			DMDEBUG("  Recoverer = %u", recoverer->ru_nodeid);
-			DMDEBUG("  Flusher   = %u", who);
-			if (!do_flush) {
-				DMDEBUG("Blocking flush");
-				return -EBUSY;
-			} else
-				DMDEBUG("Allowing flush");
+		list_for_each_entry(ru, &lc->region_users, ru_list) {
+			if ((ru->ru_region == lc->recovering_region) &&
+			    (ru->ru_rw != RU_RECOVER)) {
+				DMERR("Flush attempted to recovering region by %u: %Lu/%s",
+				      who, lc->recovering_region,
+				      lc->uuid + (strlen(lc->uuid) - 8));
+				BUG();
+			}
 		}
 	}
 
@@ -601,7 +585,6 @@
 static int server_get_resync_work(struct log_c *lc, struct log_request *lr, uint32_t who)
 {
 	struct region_user *new;
-	int sync_search, conflict = 0;
 	region_t *region = &(lr->u.lr_region_rtn);
 
 	lr->u.lr_int_rtn = 0; /* Default to no work */
@@ -625,19 +608,13 @@
 		}
 	}
 
-	for (sync_search = lc->sync_search;
-	     sync_search < lc->region_count;
-	     sync_search = (*region + 1)) {
-		*region = ext2_find_next_zero_bit((unsigned long *) lc->sync_bits,
-						  lc->region_count,
-						  sync_search);
-		if (find_ru_by_region(lc, *region)) {
-			conflict = 1;
-			DMDEBUG("Recovery blocked by outstanding write on region %Lu/%s",
-				*region, lc->uuid + (strlen(lc->uuid) - 8));
-		} else {
-			break;
-		}
+	*region = ext2_find_next_zero_bit((unsigned long *) lc->sync_bits,
+					  lc->region_count,
+					  lc->sync_search);
+	if (find_ru_by_region(lc, *region)) {
+		DMDEBUG("Recovery blocked by outstanding write on region %Lu/%s",
+			*region, lc->uuid + (strlen(lc->uuid) - 8));
+		return 0;
 	}
 
 	if (*region >= lc->region_count)
@@ -647,8 +624,7 @@
 	if (!new)
 		return -ENOMEM;
 
-	if (!conflict)
-		lc->sync_search = *region + 1;
+	lc->sync_search = *region + 1;
 
 	lc->recovering_region = *region;
 
@@ -678,13 +654,18 @@
 			return -EINVAL;
 		}
 
-		lc->recovering_region = (uint64_t)-1;
-
 		/* We could receive multiple identical request due to network failure */
-		if(!log_test_bit(lc->sync_bits, lr->u.lr_region)) {
+		if (!log_test_bit(lc->sync_bits, lr->u.lr_region)) {
 			log_set_bit(lc, lc->sync_bits, lr->u.lr_region);
 			lc->sync_count++;
 		}
+
+		/*
+		 * We will: 
+		 * lc->recovering_region = (uint64_t)-1;
+		 * in clear_region so we can do extra validation
+		 */
+
 		lc->sync_pass = 0;
 
 		DMDEBUG("Resync work completed by %u: %Lu/%s",
@@ -1064,6 +1045,9 @@
 		case LRT_IS_CLEAN:
 			error = server_is_clean(lc, lr);
 			break;
+		case LRT_IS_REMOTE_RECOVERING:
+			error = server_is_remote_recovering(lc, lr);
+			break;
 		case LRT_IN_SYNC:
 			error = server_in_sync(lc, lr);
 			break;
--- cluster/cmirror-kernel/src/Attic/dm-cmirror-xfr.h	2007/07/11 16:18:03	1.1.2.6
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-xfr.h	2007/09/21 20:07:37	1.1.2.7
@@ -10,18 +10,19 @@
 #define MAX_NAME_LEN 128
 
 #define LRT_IS_CLEAN			1
-#define LRT_IN_SYNC             	2
-#define LRT_MARK_REGION         	3
-#define LRT_CLEAR_REGION        	4
-#define LRT_FLUSH                       5
-#define LRT_GET_RESYNC_WORK     	6
-#define LRT_COMPLETE_RESYNC_WORK        7
-#define LRT_GET_SYNC_COUNT      	8
-
-#define LRT_ELECTION			9
-#define LRT_SELECTION			10
-#define LRT_MASTER_ASSIGN		11
-#define LRT_MASTER_LEAVING		12
+#define LRT_IS_REMOTE_RECOVERING	2
+#define LRT_IN_SYNC             	3
+#define LRT_MARK_REGION         	4
+#define LRT_CLEAR_REGION        	5
+#define LRT_FLUSH                       6
+#define LRT_GET_RESYNC_WORK     	7
+#define LRT_COMPLETE_RESYNC_WORK        8
+#define LRT_GET_SYNC_COUNT      	9
+
+#define LRT_ELECTION			10
+#define LRT_SELECTION			11
+#define LRT_MASTER_ASSIGN		12
+#define LRT_MASTER_LEAVING		13
 
 #define CLUSTER_LOG_PORT 51005
 



             reply	other threads:[~2007-09-21 20:07 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-09-21 20:07 jbrassow [this message]
  -- strict thread matches above, loose matches on Subject: below --
2007-10-03 19:02 [Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-client.c jbrassow
2007-09-27 20:31 jbrassow
2007-09-26  3:15 jbrassow
2007-09-13 15:24 jbrassow
2007-07-11 16:18 jbrassow
2007-04-26 16:55 jbrassow
2007-04-26 16:54 jbrassow
2007-04-24 20:10 jbrassow
2007-04-24 20:08 jbrassow
2007-04-10  7:13 jbrassow
2007-04-10  7:12 jbrassow
2007-04-05 21:33 jbrassow
2007-04-05 21:32 jbrassow
2007-04-03 18:23 jbrassow
2007-04-03 18:21 jbrassow
2007-03-22 22:34 jbrassow
2007-03-22 22:22 jbrassow
2007-03-14  4:28 jbrassow
2007-02-26 17:38 jbrassow
2007-02-20 19:35 jbrassow
2007-02-19 16:29 jbrassow
2007-02-14 17:44 jbrassow
2007-02-02 17:22 jbrassow
2007-01-08 19:28 jbrassow
2006-12-07 18:58 jbrassow
2006-09-05 17:50 jbrassow
2006-09-05 17:48 jbrassow
2006-07-27 23:11 jbrassow
2006-07-27 23:11 jbrassow
2006-07-22 22:19 jbrassow
2006-07-22 22:19 jbrassow
2006-07-22 22:12 jbrassow
2006-06-29 19:49 jbrassow
2006-06-29 19:48 jbrassow
2006-06-29 19:46 jbrassow
2006-06-27 20:19 jbrassow
2006-06-15 19:48 jbrassow
2006-06-15 19:34 jbrassow
2006-06-13 16:26 jbrassow

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070921200738.26579.qmail@sourceware.org \
    --to=jbrassow@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).