cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed
From: jbrassow@sourceware.org <jbrassow@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-client.c ...
Date: 5 Apr 2007 21:33:37 -0000	[thread overview]
Message-ID: <20070405213337.30913.qmail@sourceware.org> (raw)

CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL45
Changes by:	jbrassow at sourceware.org	2007-04-05 22:33:36

Modified files:
	cmirror-kernel/src: dm-cmirror-client.c dm-cmirror-server.c 

Log message:
	Bug 234918 Processed: NMI Watchdog detected LOCKUP while running proces...
	Bug 217438: scrolling kernel requests to mark mirror regions
	
	Item 1:
	I needed to check for marked regions when getting resync work, not
	just check for resyncing regions when a mark/flush happens.
	
	Item 2:
	There is a corner case that allows two calls to clear the same
	region.  The second does not need to be logged.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-client.c.diff?cvsroot=cluster&only_with_tag=RHEL45&r1=1.1.2.41.2.2&r2=1.1.2.41.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-server.c.diff?cvsroot=cluster&only_with_tag=RHEL45&r1=1.1.2.26.2.3&r2=1.1.2.26.2.4

--- cluster/cmirror-kernel/src/Attic/dm-cmirror-client.c	2007/04/03 18:23:01	1.1.2.41.2.2
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-client.c	2007/04/05 21:33:36	1.1.2.41.2.3
@@ -1034,7 +1034,9 @@
 
 	spin_lock(&lc->state_lock);
 
-	/* Should find match in this list, or no lists at all */
+	/*
+	 * The nominal case is to find the region in the marked list
+	 */
 	list_for_each_entry_safe(rs, tmp_rs, &lc->mark_logged, rs_list){
 		if(region == rs->rs_region){
 			list_del_init(&rs->rs_list);
@@ -1043,28 +1045,46 @@
 		}
 	}
 
-
-	list_for_each_entry_safe(rs, tmp_rs, &lc->mark_waiting, rs_list){
+	/*
+	 * It is possible, but unlikely to get to this case. It requires
+	 * the following to happen:
+	 * 1) mark the region for writing
+	 * 2) clear the region
+	 * 3) clear doesn't get flushed because of bug 235040
+	 * 4) suspend due to server relocation
+	 * 5) on-disk log says we need to recover (because it hasn't been cleared)
+	 * 6) we recover the region
+	 * 7) clearing the region after recovery causes us to get here
+	 *
+	 * Once 235040 is cleared, any entries found in this list should
+	 * cause a bug.
+	 */
+	list_for_each_entry_safe(rs, tmp_rs, &lc->clear_waiting, rs_list){
 		if(region == rs->rs_region){
-			DMERR("Clear pre-empting mark (%Lu/%s)",
-			       region, lc->uuid + (strlen(lc->uuid) - 8));
-			BUG();
+			DMERR("%d) Double clear on region ("
+			      SECTOR_FORMAT ")", __LINE__, region);
+			goto out;
 		}
 	}
 
-	list_for_each_entry_safe(rs, tmp_rs, &lc->clear_waiting, rs_list){
+	list_for_each_entry_safe(rs, tmp_rs, &lc->mark_waiting, rs_list){
 		if(region == rs->rs_region){
-			DMERR("%d) Double clear on region ("
-			      SECTOR_FORMAT ")", __LINE__, region);
+			DMERR("Clear pre-empting mark (%Lu/%s)",
+			       region, lc->uuid + (strlen(lc->uuid) - 8));
 			BUG();
 		}
 	}
+	
 	/* We can get here because we may be doing resync_work, and therefore,**
 	** clearing without ever marking..................................... */
 
 	/* Don't need to spin_unlock, because allocation is non-blocking */
 	rs_new = mempool_alloc(region_state_pool, GFP_ATOMIC);
-	BUG_ON(!rs_new);
+	if (!rs_new) {
+		DMERR("Failed to allocate space for clear region request: %Lu",
+		      region);
+		BUG();
+	}
 	memset(rs_new, 0, sizeof(struct region_state));
 
 	rs_new->rs_region = region;
@@ -1088,6 +1108,21 @@
 		DMWARN("Error while getting resync work: bad region");
 		rtn = 0;
 	}
+
+	/*
+	 * Check for bug 235039
+	 * Note the changes in cluser_clear_region
+	 */
+	if (rtn == 1) {
+		struct region_state *rs, *tmp_rs;
+		list_for_each_entry_safe(rs, tmp_rs, &lc->clear_waiting, rs_list) {
+			if (*region == rs->rs_region) {
+				DMERR("WARNING: Bug 235039/235040 detected!");
+				DMERR("Work-around in place.");
+			}
+		}
+	}
+
 	return rtn;
 }
 
--- cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c	2007/04/04 21:36:01	1.1.2.26.2.3
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c	2007/04/05 21:33:36	1.1.2.26.2.4
@@ -656,6 +656,8 @@
 
 
 static int server_complete_resync_work(struct log_c *lc, struct log_request *lr, int success){
+	struct region_user *ru;
+
 	if (lr->u.lr_region > lc->region_count) {
 		return -EINVAL;
 	}
@@ -678,6 +680,42 @@
 
 		DMDEBUG("Resync work completed: %Lu", lr->u.lr_region);
 	} else if (log_test_bit(lc->sync_bits, lr->u.lr_region)) {
+		ru = find_ru_by_region(lc, lr->u.lr_region);
+
+		/*
+		 * The following condition can never happen unless we have
+		 * a corrupted list or we had a communication error.
+		 *
+		 * If a write failed to one of the mirror devices, the ru
+		 * should be RU_WRITE.  If a recovery failed, it should be
+		 * RU_RECOVER
+		 */
+		if (!ru) {
+			DMERR("Unable to find region being marked out-of-sync: %Lu",
+			      lr->u.lr_region);
+			return -EINVAL;
+		}
+
+		if (ru->ru_rw == RU_RECOVER) {
+			if (lr->u.lr_region != lc->recovering_region) {
+				DMERR("Recovering region mismatch: (%Lu/%Lu)",
+				      lr->u.lr_region, lc->recovering_region);
+				BUG();
+			}
+			/*
+			 * Clear the recovery
+			 */
+			lc->recovering_region = (uint64_t)-1;
+			list_del(&ru->ru_list);
+			mempool_free(ru, region_user_pool);
+		} else {  /* ru->ru_rw == RU_WRITE */
+			/*
+			 * Mirror has place the region into RH_NOSYNC
+			 * It is safe to pull the ru
+			 */
+			list_del(&ru->ru_list);
+			mempool_free(ru, region_user_pool);			
+		}
 		/* gone again: lc->sync_count--;*/
 		log_clear_bit(lc, lc->sync_bits, lr->u.lr_region);
 	}



             reply	other threads:[~2007-04-05 21:33 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-04-05 21:33 jbrassow [this message]
  -- strict thread matches above, loose matches on Subject: below --
2007-10-03 19:02 [Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-client.c jbrassow
2007-09-27 20:31 jbrassow
2007-09-26  3:15 jbrassow
2007-09-21 20:07 jbrassow
2007-09-13 15:24 jbrassow
2007-07-11 16:18 jbrassow
2007-04-26 16:55 jbrassow
2007-04-26 16:54 jbrassow
2007-04-24 20:10 jbrassow
2007-04-24 20:08 jbrassow
2007-04-10  7:13 jbrassow
2007-04-10  7:12 jbrassow
2007-04-05 21:32 jbrassow
2007-04-03 18:23 jbrassow
2007-04-03 18:21 jbrassow
2007-03-22 22:34 jbrassow
2007-03-22 22:22 jbrassow
2007-03-14  4:28 jbrassow
2007-02-26 17:38 jbrassow
2007-02-20 19:35 jbrassow
2007-02-19 16:29 jbrassow
2007-02-14 17:44 jbrassow
2007-02-02 17:22 jbrassow
2007-01-08 19:28 jbrassow
2006-12-07 18:58 jbrassow
2006-09-05 17:50 jbrassow
2006-09-05 17:48 jbrassow
2006-07-27 23:11 jbrassow
2006-07-27 23:11 jbrassow
2006-07-22 22:19 jbrassow
2006-07-22 22:19 jbrassow
2006-07-22 22:12 jbrassow
2006-06-29 19:49 jbrassow
2006-06-29 19:48 jbrassow
2006-06-29 19:46 jbrassow
2006-06-27 20:19 jbrassow
2006-06-15 19:48 jbrassow
2006-06-15 19:34 jbrassow
2006-06-13 16:26 jbrassow

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070405213337.30913.qmail@sourceware.org \
    --to=jbrassow@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).