cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed
From: jbrassow@sourceware.org <jbrassow@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-client.c ...
Date: 22 Mar 2007 22:34:46 -0000	[thread overview]
Message-ID: <20070322223446.14408.qmail@sourceware.org> (raw)

CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL45
Changes by:	jbrassow at sourceware.org	2007-03-22 22:34:44

Modified files:
	cmirror-kernel/src: dm-cmirror-client.c dm-cmirror-server.c 
	                    dm-cmirror-xfr.h 

Log message:
	Bug 233034: cmirror server failure/migration during GFS I/O causes metad...
	
	Add sequence number to messages to ensure
	that cmirror clients get the response they expect.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-client.c.diff?cvsroot=cluster&only_with_tag=RHEL45&r1=1.1.2.41&r2=1.1.2.41.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-server.c.diff?cvsroot=cluster&only_with_tag=RHEL45&r1=1.1.2.26&r2=1.1.2.26.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-xfr.h.diff?cvsroot=cluster&only_with_tag=RHEL45&r1=1.1.2.2&r2=1.1.2.2.2.1

--- cluster/cmirror-kernel/src/Attic/dm-cmirror-client.c	2007/03/14 04:28:32	1.1.2.41
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-client.c	2007/03/22 22:34:44	1.1.2.41.2.1
@@ -53,12 +53,6 @@
 /* These vars are just for stats, and will be removed */
 static uint32_t request_count=0;
 static uint32_t request_retry_count=0;
-static int clear_req=0;
-static int mark_req=0;
-static int insync_req=0;
-static int clear_req2ser=0;
-static int mark_req2ser=0;
-static int insync_req2ser=0;
 
 static void *region_state_alloc(int gfp_mask, void *pool_data){
 	return kmalloc(sizeof(struct region_state), gfp_mask);
@@ -316,6 +310,7 @@
 
 static int _consult_server(struct log_c *lc, region_t region,
 			  int type, region_t *result, int *retry){
+	static int seq = 0;
 	int len;
 	int error=0;
 	struct sockaddr_in saddr_in;
@@ -336,6 +331,7 @@
 	memset(lr, 0, sizeof(struct log_request));
 	
 	lr->lr_type = type;
+	lr->lr_seq = seq;
 	if(type == LRT_MASTER_LEAVING){
 		lr->u.lr_starter = my_id;
 	} else {
@@ -369,18 +365,6 @@
 	iov.iov_len = sizeof(struct log_request);
 	iov.iov_base = lr;
 
-	if(lr->lr_type == LRT_MARK_REGION){
-		mark_req2ser++;
-	}
-
-	if(lr->lr_type == LRT_CLEAR_REGION){
-		clear_req2ser++;
-	}
-	
-	if(lr->lr_type == LRT_IN_SYNC){
-		insync_req2ser++;
-	}
-	
 	fs = get_fs();
 	set_fs(get_ds());
   
@@ -394,6 +378,7 @@
 		goto fail;
 	}
 
+rerecv:
 	iov.iov_len = sizeof(struct log_request);
 	iov.iov_base = lr;
 
@@ -414,9 +399,44 @@
 		DMWARN("Error while listening for server response: %d", len);
 		error = len;
 		*retry = 1;
+		seq++;
 		goto fail;
 	}
     
+	if (seq != lr->lr_seq) {
+		DMERR("Message sequence number mismatch: %d/%d",
+		      seq, lr->lr_seq);
+		if (seq > lr->lr_seq) {
+			DMERR(" Skipping.  Listening again for response to %s",
+			      RQ_STRING(type));
+			memset(lr, 0, sizeof(struct log_request));
+			goto rerecv;
+		}
+		DMERR(" Must try to resend request, %s", RQ_STRING(type));
+		error = -EBADE;
+		*retry = 1;
+		seq++;
+		goto fail;
+	}
+	seq++;
+
+	if (type != lr->lr_type) {
+		DMERR("Got incorrect message type back: %s/%s",
+		      RQ_STRING(type), RQ_STRING(lr->lr_type));
+		error = -EBADE;
+		*retry = 1;
+		goto fail;
+	}
+
+	if (memcmp(lc->uuid, lr->lr_uuid, MAX_NAME_LEN)) {
+		DMERR("Got reply from server for wrong log:");
+		DMERR(" Expected UUID: %s", lc->uuid);
+		DMERR(" Recieved UUID: %s", lr->lr_uuid);
+		error = -EBADE;
+		*retry = 1;
+		goto fail;
+	}
+
 	if(lr->u.lr_int_rtn == -EAGAIN){
 		DMWARN("Server (%u), request type %d, -EAGAIN."
 		       "  Mirror suspended?",
@@ -453,17 +473,7 @@
 			DMDEBUG(" - log uuid:: %s (%s)",
 			       lc->uuid + (strlen(lc->uuid) - 8),
 			       atomic_read(&lc->suspended) ? "suspended" : "active");
-			DMDEBUG(" - request :: %s",
-			       (type == LRT_IS_CLEAN)? "LRT_IS_CLEAN":
-			       (type == LRT_IN_SYNC)? "LRT_IN_SYNC":
-			       (type == LRT_MARK_REGION)? "LRT_MARK_REGION":
-			       (type == LRT_GET_RESYNC_WORK)? "LRT_GET_RESYNC_WORK":
-			       (type == LRT_GET_SYNC_COUNT)? "LRT_GET_SYNC_COUNT":
-			       (type == LRT_CLEAR_REGION)? "LRT_CLEAR_REGION":
-			       (type == LRT_COMPLETE_RESYNC_WORK)? "LRT_COMPLETE_RESYNC_WORK":
-			       (type == LRT_MASTER_LEAVING)? "LRT_MASTER_LEAVING":
-			       (type == LRT_ELECTION)? "LRT_ELECTION":
-			       (type == LRT_SELECTION)? "LRT_SELECTION": "UNKNOWN");
+			DMDEBUG(" - request :: %s", RQ_STRING(type));
 			DMDEBUG(" - error   :: %d", error);
 			DMINFO("Too many retries, attempting to re-establish server connection.");
 			lc->server_id = 0xDEAD;
@@ -519,7 +529,7 @@
 			}
 			clear_region_count -= i;
 			DMINFO(" - %d clear region requests wiped", i);
-
+			i=0;
 			DMINFO(" - Resending all mark region requests");
 			list_for_each_entry(rs, &marked_region_list, rs_list){
 				/* Resend only those associated with referenced log */
@@ -527,7 +537,7 @@
 					continue;
 				do {
 					retry = 0;
-					DMINFO("   - " SECTOR_FORMAT, rs->rs_region);
+					i++;
 					rtn = _consult_server(rs->rs_lc, rs->rs_region,
 							      LRT_MARK_REGION, NULL, &retry);
 					if (lc->server_id == 0xDEAD) {
@@ -536,6 +546,7 @@
 					}
 				} while(retry);
 			}
+			DMINFO(" - %d mark region requests resent", i);
 			DMINFO("Clean-up complete");
 			if(type == LRT_MARK_REGION){
 				/* we just handled all marks */
@@ -544,17 +555,7 @@
 				goto out;
 			} else {
 				DMINFO("Continuing request type, %d (%s)", type,
-				      (type == LRT_IS_CLEAN)? "LRT_IS_CLEAN":
-				      (type == LRT_IN_SYNC)? "LRT_IN_SYNC":
-				      (type == LRT_MARK_REGION)? "LRT_MARK_REGION":
-				      (type == LRT_GET_RESYNC_WORK)? "LRT_GET_RESYNC_WORK":
-				      (type == LRT_GET_SYNC_COUNT)? "LRT_GET_SYNC_COUNT":
-				      (type == LRT_CLEAR_REGION)? "LRT_CLEAR_REGION":
-				      (type == LRT_COMPLETE_RESYNC_WORK)? "LRT_COMPLETE_RESYNC_WORK":
-				      (type == LRT_MASTER_LEAVING)? "LRT_MASTER_LEAVING":
-				      (type == LRT_ELECTION)? "LRT_ELECTION":
-				      (type == LRT_SELECTION)? "LRT_SELECTION": "UNKNOWN"
-					);
+				       RQ_STRING(type));
 			}
 			new_server = 0;
 		}
@@ -886,7 +887,6 @@
 	struct log_c *lc = (struct log_c *) log->context;
   
 	/* check known_regions, return if found */
-	insync_req++;
 /* take out optimization
 	if(atomic_read(&lc->in_sync) == 1){
 		return 1;
@@ -915,8 +915,6 @@
 	struct region_state *rs, *tmp_rs, *rs_new;
 	struct log_c *lc = (struct log_c *) log->context;
 
-	mark_req++;
-
 	rs_new = mempool_alloc(region_state_pool, GFP_KERNEL);
 
 	memset(rs_new, 0, sizeof(struct region_state));
@@ -924,8 +922,10 @@
 	spin_lock(&region_state_lock);
 	list_for_each_entry_safe(rs, tmp_rs, &clear_region_list, rs_list){
 		if(lc == rs->rs_lc && region == rs->rs_region){
+			/*
 			DMDEBUG("Mark pre-empting clear (%Lu/%s)",
 				region, lc->uuid + (strlen(lc->uuid) - 8));
+			*/
 			list_del_init(&rs->rs_list);
 			list_add(&rs->rs_list, &marked_region_list);
 			clear_region_count--;
@@ -1007,7 +1007,6 @@
 {
 	struct log_c *lc = (struct log_c *) log->context;
 	struct region_state *rs, *tmp_rs, *rs_new;
-	clear_req++;
 
 	rs_new = mempool_alloc(region_state_pool, GFP_ATOMIC);
 
@@ -1140,21 +1139,6 @@
 		DMINFO("  Regions marked   : %d", j);
 		DMINFO("  Regions clearing : %d", i);
 
-		DMINFO("  Mark requests    : %d", mark_req);
-		if(mark_req)
-			DMINFO("  Mark req to serv : %d (%d%%)", mark_req2ser,
-			       (mark_req2ser*100)/mark_req);
-
-		DMINFO("  Clear requests   : %d", clear_req);
-		if(clear_req)
-			DMINFO("  Clear req to serv: %d (%d%%)", clear_req2ser,
-			       (clear_req2ser*100)/clear_req);
-
-		DMINFO("  Sync  requests   : %d", insync_req);
-		if(insync_req)
-			DMINFO("  Sync req to serv : %d (%d%%)", insync_req2ser,
-			       (insync_req2ser*100)/insync_req);
-
 		if(lc->server_id == my_id){
 			print_server_status(lc);
 		}
@@ -1216,9 +1200,11 @@
 		atomic_set(&lc->in_sync, 0);
 	}
 	spin_unlock(&log_list_lock);
-	
+
+	/*
 	if (likely(!shutting_down))
 		suspend_server();
+	*/
 
 	return 0;
 }
@@ -1267,7 +1253,9 @@
 		BUG();
 		break;
 	}
+	/*
 	resume_server();
+	*/
 	return 0;
 }
 
--- cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c	2007/03/14 04:28:32	1.1.2.26
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c	2007/03/22 22:34:44	1.1.2.26.2.1
@@ -911,17 +911,7 @@
 			DMDEBUG("Getting request while server (%u) is suspended:", my_id);
 			DMDEBUG(" - Requester :: %u", nodeid);
 			DMDEBUG(" - log uuid  :: %s", lc->uuid + (strlen(lc->uuid) - 8));
-			DMDEBUG(" - req type  :: %s",
-				(lr.lr_type == LRT_IS_CLEAN)? "LRT_IS_C	LEAN":
-				(lr.lr_type == LRT_IN_SYNC)? "LRT_IN_SYNC":
-				(lr.lr_type == LRT_MARK_REGION)? "LRT_MARK_REGION":
-				(lr.lr_type == LRT_GET_RESYNC_WORK)? "LRT_GET_RESYNC_WORK":
-				(lr.lr_type == LRT_GET_SYNC_COUNT)? "LRT_GET_SYNC_COUNT":
-				(lr.lr_type == LRT_CLEAR_REGION)? "LRT_CLEAR_REGION":
-				(lr.lr_type == LRT_COMPLETE_RESYNC_WORK)? "LRT_COMPLETE_RESYNC_WORK":
-				(lr.lr_type == LRT_MASTER_LEAVING)? "LRT_MASTER_LEAVING":
-				(lr.lr_type == LRT_ELECTION)? "LRT_ELECTION":
-				(lr.lr_type == LRT_SELECTION)? "LRT_SELECTION": "UNKNOWN");
+			DMDEBUG(" - req type  :: %s", RQ_STRING(lr.lr_type));
 			*/
 			if (my_id != nodeid) {
 				lr.u.lr_int_rtn = -ENXIO;
@@ -981,17 +971,7 @@
 		if(error){
 /*
 			DMWARN("Error (%d) while processing request (%s)",
-			       error,
-			       (lr.lr_type == LRT_IS_CLEAN)? "LRT_IS_CLEAN":
-			       (lr.lr_type == LRT_IN_SYNC)? "LRT_IN_SYNC":
-			       (lr.lr_type == LRT_MARK_REGION)? "LRT_MARK_REGION":
-			       (lr.lr_type == LRT_GET_RESYNC_WORK)? "LRT_GET_RESYNC_WORK":
-			       (lr.lr_type == LRT_GET_SYNC_COUNT)? "LRT_GET_SYNC_COUNT":
-			       (lr.lr_type == LRT_CLEAR_REGION)? "LRT_CLEAR_REGION":
-			       (lr.lr_type == LRT_COMPLETE_RESYNC_WORK)? "LRT_COMPLETE_RESYNC_WORK":
-			       (lr.lr_type == LRT_MASTER_LEAVING)? "LRT_MASTER_LEAVING":
-			       (lr.lr_type == LRT_ELECTION)? "LRT_ELECTION":
-			       (lr.lr_type == LRT_SELECTION)? "LRT_SELECTION": "UNKNOWN");
+			       error, RQ_STRING(lr.lr_type));
 */
 			lr.u.lr_int_rtn = error;
 		}
@@ -1011,17 +991,7 @@
 		set_fs(fs);
 		if(error < 0){
 			DMWARN("unable to sendmsg to client (type = %s, error = %d)",
-			       (lr.lr_type == LRT_IS_CLEAN)? "LRT_IS_CLEAN":
-			       (lr.lr_type == LRT_IN_SYNC)? "LRT_IN_SYNC":
-			       (lr.lr_type == LRT_MARK_REGION)? "LRT_MARK_REGION":
-			       (lr.lr_type == LRT_GET_RESYNC_WORK)? "LRT_GET_RESYNC_WORK":
-			       (lr.lr_type == LRT_GET_SYNC_COUNT)? "LRT_GET_SYNC_COUNT":
-			       (lr.lr_type == LRT_CLEAR_REGION)? "LRT_CLEAR_REGION":
-			       (lr.lr_type == LRT_COMPLETE_RESYNC_WORK)? "LRT_COMPLETE_RESYNC_WORK":
-			       (lr.lr_type == LRT_MASTER_LEAVING)? "LRT_MASTER_LEAVING":
-			       (lr.lr_type == LRT_ELECTION)? "LRT_ELECTION":
-			       (lr.lr_type == LRT_SELECTION)? "LRT_SELECTION": "UNKNOWN",
-			       error);
+			       RQ_STRING(lr.lr_type), error);
 			return error;
 		}
 	} else if(error == -EAGAIN || error == -ETIMEDOUT){
--- cluster/cmirror-kernel/src/Attic/dm-cmirror-xfr.h	2007/02/14 17:44:07	1.1.2.2
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-xfr.h	2007/03/22 22:34:44	1.1.2.2.2.1
@@ -25,8 +25,21 @@
 
 #define CLUSTER_LOG_PORT 51005
 
+#define RQ_STRING(x) \
+	((x) == LRT_IS_CLEAN) ? "LRT_IS_CLEAN": \
+	((x) == LRT_IN_SYNC) ? "LRT_IN_SYNC": \
+	((x) == LRT_MARK_REGION) ? "LRT_MARK_REGION": \
+	((x) == LRT_GET_RESYNC_WORK) ? "LRT_GET_RESYNC_WORK": \
+	((x) == LRT_GET_SYNC_COUNT) ? "LRT_GET_SYNC_COUNT": \
+	((x) == LRT_CLEAR_REGION) ? "LRT_CLEAR_REGION": \
+	((x) == LRT_COMPLETE_RESYNC_WORK) ? "LRT_COMPLETE_RESYNC_WORK": \
+	((x) == LRT_MASTER_LEAVING) ? "LRT_MASTER_LEAVING": \
+	((x) == LRT_ELECTION) ? "LRT_ELECTION": \
+	((x) == LRT_SELECTION) ? "LRT_SELECTION": "UNKNOWN"
+
 struct log_request {
 	int lr_type;
+	int lr_seq;
 	union {
 		struct {
 			uint32_t lr_starter;



             reply	other threads:[~2007-03-22 22:34 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-03-22 22:34 jbrassow [this message]
  -- strict thread matches above, loose matches on Subject: below --
2007-10-03 19:02 [Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-client.c jbrassow
2007-09-27 20:31 jbrassow
2007-09-26  3:15 jbrassow
2007-09-21 20:07 jbrassow
2007-09-13 15:24 jbrassow
2007-07-11 16:18 jbrassow
2007-04-26 16:55 jbrassow
2007-04-26 16:54 jbrassow
2007-04-24 20:10 jbrassow
2007-04-24 20:08 jbrassow
2007-04-10  7:13 jbrassow
2007-04-10  7:12 jbrassow
2007-04-05 21:33 jbrassow
2007-04-05 21:32 jbrassow
2007-04-03 18:23 jbrassow
2007-04-03 18:21 jbrassow
2007-03-22 22:22 jbrassow
2007-03-14  4:28 jbrassow
2007-02-26 17:38 jbrassow
2007-02-20 19:35 jbrassow
2007-02-19 16:29 jbrassow
2007-02-14 17:44 jbrassow
2007-02-02 17:22 jbrassow
2007-01-08 19:28 jbrassow
2006-12-07 18:58 jbrassow
2006-09-05 17:50 jbrassow
2006-09-05 17:48 jbrassow
2006-07-27 23:11 jbrassow
2006-07-27 23:11 jbrassow
2006-07-22 22:19 jbrassow
2006-07-22 22:19 jbrassow
2006-07-22 22:12 jbrassow
2006-06-29 19:49 jbrassow
2006-06-29 19:48 jbrassow
2006-06-29 19:46 jbrassow
2006-06-27 20:19 jbrassow
2006-06-15 19:48 jbrassow
2006-06-15 19:34 jbrassow
2006-06-13 16:26 jbrassow

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070322223446.14408.qmail@sourceware.org \
    --to=jbrassow@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).