All of lore.kernel.org
 help / color / mirror / Atom feed
From: jbrassow@sourceware.org <jbrassow@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/cmirror/src cluster.c functions.c loca ...
Date: 4 Feb 2008 18:27:22 -0000	[thread overview]
Message-ID: <20080204182722.19913.qmail@sourceware.org> (raw)

CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL5
Changes by:	jbrassow at sourceware.org	2008-02-04 18:27:20

Modified files:
	cmirror/src    : cluster.c functions.c local.c logging.h 

Log message:
	- change priority of some log statements
	- fix potential OOB memory op by macro
	- add reference counting to log... fixes some issues with mirror conversion
	- plug a memory leak... likely related to bug 383291

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/cluster.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.11&r2=1.1.2.12
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/functions.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.10&r2=1.1.2.11
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/local.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.10&r2=1.1.2.11
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/logging.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.4&r2=1.1.2.5

--- cluster/cmirror/src/Attic/cluster.c	2008/01/23 21:21:06	1.1.2.11
+++ cluster/cmirror/src/Attic/cluster.c	2008/02/04 18:27:20	1.1.2.12
@@ -335,7 +335,7 @@
 	}
 
 	if (rv == SA_AIS_ERR_EXIST) {
-		LOG_ERROR("export_checkpoint: checkpoint already exists");
+		LOG_DBG("export_checkpoint: checkpoint already exists");
 		EXIT();
 		return -EEXIST;
 	}
@@ -361,7 +361,7 @@
 	}
 
 	if (rv == SA_AIS_ERR_EXIST) {
-		LOG_ERROR("export_checkpoint: sync checkpoint section already exists");
+		LOG_DBG("export_checkpoint: sync checkpoint section already exists");
 		EXIT();
 		return -EEXIST;
 	}
@@ -588,8 +588,8 @@
 		}
 
 		for (cp = entry->checkpoint_list; cp;) {
-			LOG_ERROR("Checkpoint data available for node %u",
-				  cp->requester);
+			LOG_DBG("[%s] Checkpoint data available for node %u",
+				SHORT_UUID(entry->name.value), cp->requester);
 
 			/*
 			 * FIXME: Check return code.  Could send failure
@@ -747,24 +747,25 @@
 
 	ENTER();
 
-	LOG_PRINT("****** CPG config callback ****************");
+	LOG_DBG("****** CPG config callback **[%s]**",
+		SHORT_UUID(gname->value));
 
-	LOG_PRINT("* JOINING (%d):", joined_list_entries);
+	LOG_DBG("* JOINING (%d):", joined_list_entries);
 	for (i = 0; i < joined_list_entries; i++)
-		LOG_PRINT("*   nodeid: %d, pid: %d",
-			  joined_list[i].nodeid, joined_list[i].pid);
+		LOG_DBG("*   nodeid: %d, pid: %d",
+			joined_list[i].nodeid, joined_list[i].pid);
 
-	LOG_PRINT("* MEMBERS (%d):", member_list_entries);
+	LOG_DBG("* MEMBERS (%d):", member_list_entries);
 	for (i = 0; i < member_list_entries; i++)
-		LOG_PRINT("*   nodeid: %d, pid: %d",
-			  member_list[i].nodeid, member_list[i].pid);
+		LOG_DBG("*   nodeid: %d, pid: %d",
+			member_list[i].nodeid, member_list[i].pid);
 
-	LOG_PRINT("* LEAVING (%d):", left_list_entries);
+	LOG_DBG("* LEAVING (%d):", left_list_entries);
 	for (i = 0; i < left_list_entries; i++)
-		LOG_PRINT("*   nodeid: %d, pid: %d",
-			  left_list[i].nodeid, left_list[i].pid);	
+		LOG_DBG("*   nodeid: %d, pid: %d",
+			left_list[i].nodeid, left_list[i].pid);	
 
-	LOG_PRINT("*****************************************");
+	LOG_DBG("*****************************************");
 
 	list_for_each_entry_safe(match, tmp, &clog_cpg_list, list) {
 		LOG_DBG("Given handle: %llu", (unsigned long long)handle);
--- cluster/cmirror/src/Attic/functions.c	2008/01/23 21:21:06	1.1.2.10
+++ cluster/cmirror/src/Attic/functions.c	2008/02/04 18:27:20	1.1.2.11
@@ -43,7 +43,9 @@
 
 struct log_c {
 	struct list_head list;
+
 	char uuid[DM_UUID_LEN];
+	uint32_t ref_count;
 
 	int touched;
 	uint32_t region_size;
@@ -350,6 +352,7 @@
 	uint64_t region_count;
 	uint32_t bitset_size;
 	struct log_c *lc = NULL;
+	struct log_c *dup;
 	enum sync sync = DEFAULTSYNC;
 
 	int disk_log = 0;
@@ -422,9 +425,19 @@
 	lc->sync = sync;
 	lc->sync_search = 0;
 	lc->recovering_region = (uint64_t)-1;
-	strncpy(lc->uuid, argv[1 + disk_log], DM_UUID_LEN);
 	lc->disk_fd = -1;
 	lc->log_dev_failed = 0;
+	lc->ref_count = 1;
+	strncpy(lc->uuid, argv[1 + disk_log], DM_UUID_LEN);
+
+	if ((dup = get_log(lc->uuid)) ||
+	    (dup = get_pending_log(lc->uuid))) {
+		LOG_PRINT("[%s] Inc reference count on cluster log",
+			  SHORT_UUID(lc->uuid));
+		free(lc);
+		dup->ref_count++;
+		return 0;
+	}
 
 	INIT_LIST_HEAD(&lc->mark_list);
 
@@ -561,7 +574,8 @@
 	if (r)
 		LOG_ERROR("Failed to create cluster log (%s)", tfr->uuid);
 	else
-		LOG_PRINT("Cluster log created (%s)", tfr->uuid);
+		LOG_PRINT("[%s] Cluster log created",
+			  SHORT_UUID(tfr->uuid));
 
 	return r;
 }
@@ -575,23 +589,31 @@
 {
 	struct log_c *lc = get_log(tfr->uuid);
 
-	if (!lc) {
-		/* Is the log in the pending list? */
-		lc = get_pending_log(tfr->uuid);
-		if (!lc) {
-			LOG_ERROR("clog_dtr called on log that is not official or pending");
-			return -EINVAL;
-		}
-	} else {
-		LOG_DBG("[%s] clog_dtr: leaving CPG", SHORT_UUID(lc->uuid));
+	if (lc) {
 		/*
-		 * If postsuspend had done the destroy_cluster_cpg,
-		 * the log context would be in the pending list
+		 * The log should not be on the official list.  There
+		 * should have been a suspend first.
 		 */
-		destroy_cluster_cpg(tfr->uuid);
+		lc->ref_count--;
+		if (!lc->ref_count) {
+			LOG_ERROR("[%s] DTR before SUS: leaving CPG",
+				  SHORT_UUID(tfr->uuid));
+			destroy_cluster_cpg(tfr->uuid);
+		}
+	} else if ((lc = get_pending_log(tfr->uuid))) {
+		lc->ref_count--;
+	} else {
+		LOG_ERROR("clog_dtr called on log that is not official or pending");
+		return -EINVAL;
+	}
+
+	if (lc->ref_count) {
+		LOG_PRINT("[%s] Dec reference count on cluster log",
+			  SHORT_UUID(lc->uuid));
+		return 0;
 	}
 
-	LOG_PRINT("Cluster log removed (%s)", lc->uuid);
+	LOG_PRINT("[%s] Cluster log removed", SHORT_UUID(lc->uuid));
 
 	list_del_init(&lc->list);
 	if (lc->disk_fd != -1)
@@ -638,7 +660,7 @@
 	if (!lc)
 		return -EINVAL;
 
-	LOG_DBG("[%s] clog_postsuspend: leaving CPG", SHORT_UUID(lc->uuid));
+	LOG_PRINT("[%s] clog_postsuspend: leaving CPG", SHORT_UUID(lc->uuid));
 	destroy_cluster_cpg(tfr->uuid);
 
 	return 0;
@@ -656,7 +678,7 @@
 	if (!lc)
 		return -EINVAL;
 
-	LOG_DBG("[%s] clog_postsuspend: finalizing", SHORT_UUID(lc->uuid));
+	LOG_PRINT("[%s] clog_postsuspend: finalizing", SHORT_UUID(lc->uuid));
 	lc->resume_override = 0;
 
 	/* move log to pending list */
@@ -686,10 +708,12 @@
 
 	switch (lc->resume_override) {
 	case 1000:
-		LOG_ERROR("ERROR:: Additional resume issued before suspend");
+		LOG_ERROR("[%s] ERROR:: Additional resume issued before suspend",
+			  SHORT_UUID(tfr->uuid));
 		return 0;
 	case 0:
-		LOG_PRINT("Master resume: reading disk log");
+		LOG_PRINT("[%s] Master resume: reading disk log",
+			  SHORT_UUID(lc->uuid));
 		lc->resume_override = 1000;
 		break;
 	case 1:
@@ -699,8 +723,8 @@
 		LOG_ERROR("Error:: partial bit loading (just clean_bits)");
 		return -EINVAL;
 	case 3:
-		LOG_DBG("[%s] Non-master resume: bits pre-loaded",
-			SHORT_UUID(lc->uuid));
+		LOG_PRINT("[%s] Non-master resume: bits pre-loaded",
+			  SHORT_UUID(lc->uuid));
 		lc->resume_override = 1000;
 		lc->sync_count = count_bits32(lc->clean_bits, lc->bitset_uint32_count);
 		LOG_DBG("[%s] Initial sync_count = %llu",
@@ -1232,6 +1256,9 @@
 	struct log_c *lc = get_log(tfr->uuid);
 
 	if (!lc)
+		lc = get_pending_log(tfr->uuid);
+
+	if (!lc)
 		return -EINVAL;
 
 	if (lc->disk_fd == -1)
@@ -1287,6 +1314,9 @@
 	struct log_c *lc = get_log(tfr->uuid);
 
 	if (!lc)
+		lc = get_pending_log(tfr->uuid);
+
+	if (!lc)
 		return -EINVAL;
 
 	if (lc->disk_fd == -1)
--- cluster/cmirror/src/Attic/local.c	2008/01/25 16:24:47	1.1.2.10
+++ cluster/cmirror/src/Attic/local.c	2008/02/04 18:27:20	1.1.2.11
@@ -16,8 +16,6 @@
 #include "local.h"
 
 static int cn_fd;  /* Connector (netlink) socket fd */
-static int request_array[20];  /* for request counting (debugging) */
-static int cluster_array[20];  /* for request counting (debugging) */
 
 static int kernel_recv_helper(void *data, int in_size)
 {
@@ -145,7 +143,6 @@
  */
 static int do_local_work(void *data)
 {
-	static int request_count = 0;
 	int r, i;
 	struct clog_tfr *tfr = NULL;
 
@@ -154,9 +151,6 @@
 	if (r)
 		return r;
 
-	request_array[tfr->request_type]++;
-	request_count++;
-
 	LOG_DBG("Request from kernel recieved [%s/%s/%llu]",
 		RQ_TYPE(tfr->request_type), SHORT_UUID(tfr->uuid),
 		(unsigned long long)tfr->seq);
@@ -208,13 +202,22 @@
 		}
 		/* ELSE, fall through to default */
 	default:
-		cluster_array[tfr->request_type]++;
-		/* Add before send_to_cluster, so cluster code can find it */
-		queue_add_tail(tfr, cluster_queue);
 		r = cluster_send(tfr);
-		if (r)
-			LOG_ERROR("Unable to send request to cluster: %s",
-				  strerror(-r));
+		if (r) {
+			LOG_ERROR("[%s] Unable to send %s to cluster: %s",
+				  SHORT_UUID(tfr->uuid),
+				  RQ_TYPE(tfr->request_type), strerror(-r));
+			tfr->error = r;
+			kernel_send(tfr);
+		} else {
+			/*
+			 * If this was multi-threaded, we would have to
+			 * add the 'tfr' to the queue before doing
+			 * the cluster_send
+			 */
+			queue_add_tail(tfr, cluster_queue);
+		}
+
 		break;
 	}
 
@@ -223,14 +226,6 @@
 		tfr->error = r;
 	}
 
-	if (!(request_count % 10000)) {
-		LOG_PRINT("Total requests (%d):", request_count);
-		for (i = 0; i < 20; i++)
-			LOG_PRINT("  %s: %d", RQ_TYPE(i), request_array[i]);
-		LOG_PRINT("Cluster-bound requests:");
-		for (i = 0; i < 20; i++)
-			LOG_PRINT("  %s: %d", RQ_TYPE(i), cluster_array[i]);
-	}
 	EXIT();
 	return r;
 }
@@ -303,9 +298,6 @@
 
 	ENTER();
 
-	memset(request_array, 0, sizeof(int)*20);
-	memset(cluster_array, 0, sizeof(int)*20);
-
 	cn_fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
 	if (cn_fd < 0) {
 		EXIT();
--- cluster/cmirror/src/Attic/logging.h	2008/01/18 17:11:07	1.1.2.4
+++ cluster/cmirror/src/Attic/logging.h	2008/02/04 18:27:20	1.1.2.5
@@ -29,7 +29,7 @@
 #endif
 
 /* SHORT_UUID - print last 8 chars of a string */
-#define SHORT_UUID(x) ((x) + (strlen(x) - 8))
+#define SHORT_UUID(x) (strlen(x) > 8) ? ((x) + (strlen(x) - 8)) : (x)
 
 extern int log_tabbing;
 extern int log_is_open;



             reply	other threads:[~2008-02-04 18:27 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-02-04 18:27 jbrassow [this message]
  -- strict thread matches above, loose matches on Subject: below --
2008-01-18 17:11 [Cluster-devel] cluster/cmirror/src cluster.c functions.c loca jbrassow

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080204182722.19913.qmail@sourceware.org \
    --to=jbrassow@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.