ocfs2-devel.oss.oracle.com archive mirror
 help / color / mirror / Atom feed
* [Ocfs2-devel] [PATCH 1/2] ocfs2/cluster: Heartbeat mismatch message improved
@ 2011-02-21 22:25 Sunil Mushran
  2011-02-21 22:25 ` [Ocfs2-devel] [PATCH 2/2] ocfs2/dlm: Move kmalloc() outside the spinlock Sunil Mushran
  2011-02-22  9:34 ` [Ocfs2-devel] [PATCH 1/2] ocfs2/cluster: Heartbeat mismatch message improved Joel Becker
  0 siblings, 2 replies; 5+ messages in thread
From: Sunil Mushran @ 2011-02-21 22:25 UTC (permalink / raw)
  To: ocfs2-devel

If o2hb finds unexpected values in the heartbeat slot, it prints a message
"ERROR: Device "dm-6": another node is heartbeating in our slot!"

This patch adds more information allowing us to see the actual mismatch.
The new message reads "ERROR: Heartbeat mismatch on "dm-6": expected(92:0x234543,
0x76567) ondisk(92:0x234543, 0x76565)"

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
---
 fs/ocfs2/cluster/heartbeat.c |   34 +++++++++++++++++-----------------
 1 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index b108e86..d633df0 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -543,25 +543,27 @@ static int o2hb_verify_crc(struct o2hb_region *reg,
 
 /* We want to make sure that nobody is heartbeating on top of us --
  * this will help detect an invalid configuration. */
-static int o2hb_check_last_timestamp(struct o2hb_region *reg)
+static void o2hb_check_last_timestamp(struct o2hb_region *reg)
 {
-	int node_num, ret;
 	struct o2hb_disk_slot *slot;
 	struct o2hb_disk_heartbeat_block *hb_block;
 
-	node_num = o2nm_this_node();
-
-	ret = 1;
-	slot = &reg->hr_slots[node_num];
+	slot = &reg->hr_slots[o2nm_this_node()];
 	/* Don't check on our 1st timestamp */
-	if (slot->ds_last_time) {
-		hb_block = slot->ds_raw_block;
+	if (!slot->ds_last_time)
+		return;
 
-		if (le64_to_cpu(hb_block->hb_seq) != slot->ds_last_time)
-			ret = 0;
-	}
+	hb_block = slot->ds_raw_block;
+	if (le64_to_cpu(hb_block->hb_seq) == slot->ds_last_time)
+		return;
 
-	return ret;
+	mlog(ML_ERROR, "Heartbeat mismatch on \"%s\": "
+	     "expected(%u:0x%llx, 0x%llx), ondisk(%u:0x%llx, 0x%llx)\n",
+	     reg->hr_dev_name, slot->ds_node_num,
+	     (unsigned long long)slot->ds_last_generation,
+	     (unsigned long long)slot->ds_last_time, hb_block->hb_node,
+	     (unsigned long long)le64_to_cpu(hb_block->hb_generation),
+	     (unsigned long long)le64_to_cpu(hb_block->hb_seq));
 }
 
 static inline void o2hb_prepare_block(struct o2hb_region *reg,
@@ -987,9 +989,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
 	/* With an up to date view of the slots, we can check that no
 	 * other node has been improperly configured to heartbeat in
 	 * our slot. */
-	if (!o2hb_check_last_timestamp(reg))
-		mlog(ML_ERROR, "Device \"%s\": another node is heartbeating "
-		     "in our slot!\n", reg->hr_dev_name);
+	o2hb_check_last_timestamp(reg);
 
 	/* fill in the proper info for our next heartbeat */
 	o2hb_prepare_block(reg, reg->hr_generation);
@@ -1003,8 +1003,8 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
 	}
 
 	i = -1;
-	while((i = find_next_bit(configured_nodes, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
-
+	while((i = find_next_bit(configured_nodes,
+				 O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
 		change |= o2hb_check_slot(reg, &reg->hr_slots[i]);
 	}
 
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [Ocfs2-devel] [PATCH 2/2] ocfs2/dlm: Move kmalloc() outside the spinlock
  2011-02-21 22:25 [Ocfs2-devel] [PATCH 1/2] ocfs2/cluster: Heartbeat mismatch message improved Sunil Mushran
@ 2011-02-21 22:25 ` Sunil Mushran
  2011-02-22  9:36   ` Joel Becker
  2011-02-22  9:34 ` [Ocfs2-devel] [PATCH 1/2] ocfs2/cluster: Heartbeat mismatch message improved Joel Becker
  1 sibling, 1 reply; 5+ messages in thread
From: Sunil Mushran @ 2011-02-21 22:25 UTC (permalink / raw)
  To: ocfs2-devel

In dlm_query_region_handler(), move the kmalloc outside the spinlock.
This allows us to use GFP_KERNEL instead of GFP_ATOMIC.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
---
 fs/ocfs2/dlm/dlmdomain.c |   28 ++++++++++++++++------------
 1 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 7e38a07..99805d5 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -926,9 +926,10 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
 }
 
 static int dlm_match_regions(struct dlm_ctxt *dlm,
-			     struct dlm_query_region *qr)
+			     struct dlm_query_region *qr,
+			     char *local, int locallen)
 {
-	char *local = NULL, *remote = qr->qr_regions;
+	char *remote = qr->qr_regions;
 	char *l, *r;
 	int localnr, i, j, foundit;
 	int status = 0;
@@ -957,13 +958,8 @@ static int dlm_match_regions(struct dlm_ctxt *dlm,
 		r += O2HB_MAX_REGION_NAME_LEN;
 	}
 
-	local = kmalloc(sizeof(qr->qr_regions), GFP_ATOMIC);
-	if (!local) {
-		status = -ENOMEM;
-		goto bail;
-	}
-
-	localnr = o2hb_get_all_regions(local, O2NM_MAX_REGIONS);
+	localnr = min(O2NM_MAX_REGIONS, locallen/O2HB_MAX_REGION_NAME_LEN);
+	localnr = o2hb_get_all_regions(local, (u8)localnr);
 
 	/* compare local regions with remote */
 	l = local;
@@ -1012,8 +1008,6 @@ static int dlm_match_regions(struct dlm_ctxt *dlm,
 	}
 
 bail:
-	kfree(local);
-
 	return status;
 }
 
@@ -1075,6 +1069,7 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
 {
 	struct dlm_query_region *qr;
 	struct dlm_ctxt *dlm = NULL;
+	char *local = NULL;
 	int status = 0;
 	int locked = 0;
 
@@ -1083,6 +1078,13 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
 	mlog(0, "Node %u queries hb regions on domain %s\n", qr->qr_node,
 	     qr->qr_domain);
 
+	/* buffer used in dlm_mast_regions() */
+	local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL);
+	if (!local) {
+		status = -ENOMEM;
+		goto bail;
+	}
+
 	status = -EINVAL;
 
 	spin_lock(&dlm_domain_lock);
@@ -1112,13 +1114,15 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
 		goto bail;
 	}
 
-	status = dlm_match_regions(dlm, qr);
+	status = dlm_match_regions(dlm, qr, local, sizeof(qr->qr_regions));
 
 bail:
 	if (locked)
 		spin_unlock(&dlm->spinlock);
 	spin_unlock(&dlm_domain_lock);
 
+	kfree(local);
+
 	return status;
 }
 
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [Ocfs2-devel] [PATCH 1/2] ocfs2/cluster: Heartbeat mismatch message improved
  2011-02-21 22:25 [Ocfs2-devel] [PATCH 1/2] ocfs2/cluster: Heartbeat mismatch message improved Sunil Mushran
  2011-02-21 22:25 ` [Ocfs2-devel] [PATCH 2/2] ocfs2/dlm: Move kmalloc() outside the spinlock Sunil Mushran
@ 2011-02-22  9:34 ` Joel Becker
  2011-02-23  2:21   ` Sunil Mushran
  1 sibling, 1 reply; 5+ messages in thread
From: Joel Becker @ 2011-02-22  9:34 UTC (permalink / raw)
  To: ocfs2-devel

On Mon, Feb 21, 2011 at 02:25:24PM -0800, Sunil Mushran wrote:
> If o2hb finds unexpected values in the heartbeat slot, it prints a message
> "ERROR: Device "dm-6": another node is heartbeating in our slot!"
> 
> This patch adds more information allowing us to see the actual mismatch.
> The new message reads "ERROR: Heartbeat mismatch on "dm-6": expected(92:0x234543,
> 0x76567) ondisk(92:0x234543, 0x76565)"

	Can you still say 'another node is heartbeating in our slot',
just with the detail?  The new message doesn't give a clue as to what is
happening.

Joel

-- 

"The first thing we do, let's kill all the lawyers."
                                        -Henry VI, IV:ii

			http://www.jlbec.org/
			jlbec at evilplan.org

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Ocfs2-devel] [PATCH 2/2] ocfs2/dlm: Move kmalloc() outside the spinlock
  2011-02-21 22:25 ` [Ocfs2-devel] [PATCH 2/2] ocfs2/dlm: Move kmalloc() outside the spinlock Sunil Mushran
@ 2011-02-22  9:36   ` Joel Becker
  0 siblings, 0 replies; 5+ messages in thread
From: Joel Becker @ 2011-02-22  9:36 UTC (permalink / raw)
  To: ocfs2-devel

On Mon, Feb 21, 2011 at 02:25:25PM -0800, Sunil Mushran wrote:
> In dlm_query_region_handler(), move the kmalloc outside the spinlock.
> This allows us to use GFP_KERNEL instead of GFP_ATOMIC.
> 
> Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>

	This patch is now in the merge-window branch of ocfs2.git.

Joel

-- 

"Conservative, n.  A statesman who is enamoured of existing evils,
 as distinguished from the Liberal, who wishes to replace them
 with others."
	- Ambrose Bierce, The Devil's Dictionary

			http://www.jlbec.org/
			jlbec at evilplan.org

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Ocfs2-devel] [PATCH 1/2] ocfs2/cluster: Heartbeat mismatch message improved
  2011-02-22  9:34 ` [Ocfs2-devel] [PATCH 1/2] ocfs2/cluster: Heartbeat mismatch message improved Joel Becker
@ 2011-02-23  2:21   ` Sunil Mushran
  0 siblings, 0 replies; 5+ messages in thread
From: Sunil Mushran @ 2011-02-23  2:21 UTC (permalink / raw)
  To: ocfs2-devel

On 02/22/2011 01:34 AM, Joel Becker wrote:
> On Mon, Feb 21, 2011 at 02:25:24PM -0800, Sunil Mushran wrote:
>> If o2hb finds unexpected values in the heartbeat slot, it prints a message
>> "ERROR: Device "dm-6": another node is heartbeating in our slot!"
>>
>> This patch adds more information allowing us to see the actual mismatch.
>> The new message reads "ERROR: Heartbeat mismatch on "dm-6": expected(92:0x234543,
>> 0x76567) ondisk(92:0x234543, 0x76565)"
> 	Can you still say 'another node is heartbeating in our slot',
> just with the detail?  The new message doesn't give a clue as to what is
> happening.

OK. I'll improve on the message.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2011-02-23  2:21 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-02-21 22:25 [Ocfs2-devel] [PATCH 1/2] ocfs2/cluster: Heartbeat mismatch message improved Sunil Mushran
2011-02-21 22:25 ` [Ocfs2-devel] [PATCH 2/2] ocfs2/dlm: Move kmalloc() outside the spinlock Sunil Mushran
2011-02-22  9:36   ` Joel Becker
2011-02-22  9:34 ` [Ocfs2-devel] [PATCH 1/2] ocfs2/cluster: Heartbeat mismatch message improved Joel Becker
2011-02-23  2:21   ` Sunil Mushran

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).