ocfs2-devel.oss.oracle.com archive mirror
 help / color / mirror / Atom feed
From: Junxiao Bi <junxiao.bi@oracle.com>
To: ocfs2-devel@oss.oracle.com
Subject: [Ocfs2-devel] [PATCH 4/6] ocfs2: o2hb: add some user/debug log
Date: Wed, 20 Jan 2016 11:13:37 +0800	[thread overview]
Message-ID: <1453259619-5347-5-git-send-email-junxiao.bi@oracle.com> (raw)
In-Reply-To: <1453259619-5347-1-git-send-email-junxiao.bi@oracle.com>

Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Reviewed-by: Ryan Ding <ryan.ding@oracle.com>
---
 fs/ocfs2/cluster/heartbeat.c |   39 ++++++++++++++++++++++++++++++++-------
 1 file changed, 32 insertions(+), 7 deletions(-)

diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index d5ef8dce08da..6c57fd21e597 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -292,6 +292,8 @@ struct o2hb_bio_wait_ctxt {
 	int               wc_error;
 };
 
+#define O2HB_NEGO_TIMEOUT_MS (O2HB_MAX_WRITE_TIMEOUT_MS/2)
+
 enum {
 	O2HB_NEGO_TIMEOUT_MSG = 1,
 	O2HB_NEGO_APPROVE_MSG = 2,
@@ -359,7 +361,7 @@ static void o2hb_arm_timeout(struct o2hb_region *reg)
 	cancel_delayed_work(&reg->hr_nego_timeout_work);
 	/* negotiate timeout must be less than write timeout. */
 	schedule_delayed_work(&reg->hr_nego_timeout_work,
-			      msecs_to_jiffies(O2HB_MAX_WRITE_TIMEOUT_MS)/2);
+			      msecs_to_jiffies(O2HB_NEGO_TIMEOUT_MS));
 	memset(reg->hr_nego_node_bitmap, 0, sizeof(reg->hr_nego_node_bitmap));
 }
 
@@ -393,14 +395,19 @@ static void o2hb_nego_timeout(struct work_struct *work)
 		container_of(work, struct o2hb_region,
 			     hr_nego_timeout_work.work);
 	unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
-	int master_node, i;
+	int master_node, i, ret;
 
 	o2hb_fill_node_map(live_node_bitmap, sizeof(live_node_bitmap));
 	/* lowest node as master node to make negotiate decision. */
 	master_node = find_next_bit(live_node_bitmap, O2NM_MAX_NODES, 0);
 
 	if (master_node == o2nm_this_node()) {
-		set_bit(master_node, reg->hr_nego_node_bitmap);
+		if (!test_bit(master_node, reg->hr_nego_node_bitmap)) {
+			printk(KERN_NOTICE "o2hb: node %d hb write hung for %ds on region %s (%s).\n",
+				o2nm_this_node(), O2HB_NEGO_TIMEOUT_MS/1000,
+				config_item_name(&reg->hr_item), reg->hr_dev_name);
+			set_bit(master_node, reg->hr_nego_node_bitmap);
+		}
 		if (memcmp(reg->hr_nego_node_bitmap, live_node_bitmap,
 				sizeof(reg->hr_nego_node_bitmap))) {
 			/* check negotiate bitmap every second to do timeout
@@ -412,6 +419,8 @@ static void o2hb_nego_timeout(struct work_struct *work)
 			return;
 		}
 
+		printk(KERN_NOTICE "o2hb: all nodes hb write hung, maybe region %s (%s) is down.\n",
+			config_item_name(&reg->hr_item), reg->hr_dev_name);
 		/* approve negotiate timeout request. */
 		o2hb_arm_timeout(reg);
 
@@ -421,13 +430,23 @@ static void o2hb_nego_timeout(struct work_struct *work)
 			if (i == master_node)
 				continue;
 
-			o2hb_send_nego_msg(reg->hr_key,
+			mlog(ML_HEARTBEAT, "send NEGO_APPROVE msg to node %d\n", i);
+			ret = o2hb_send_nego_msg(reg->hr_key,
 					O2HB_NEGO_APPROVE_MSG, i);
+			if (ret)
+				mlog(ML_ERROR, "send NEGO_APPROVE msg to node %d fail %d\n",
+					i, ret);
 		}
 	} else {
 		/* negotiate timeout with master node. */
-		o2hb_send_nego_msg(reg->hr_key, O2HB_NEGO_TIMEOUT_MSG,
-			master_node);
+		printk(KERN_NOTICE "o2hb: node %d hb write hung for %ds on region %s (%s), negotiate timeout with node %d.\n",
+			o2nm_this_node(), O2HB_NEGO_TIMEOUT_MS/1000, config_item_name(&reg->hr_item),
+			reg->hr_dev_name, master_node);
+		ret = o2hb_send_nego_msg(reg->hr_key, O2HB_NEGO_TIMEOUT_MSG,
+				master_node);
+		if (ret)
+			mlog(ML_ERROR, "send NEGO_TIMEOUT msg to node %d fail %d\n",
+				master_node, ret);
 	}
 }
 
@@ -438,6 +457,8 @@ static int o2hb_nego_timeout_handler(struct o2net_msg *msg, u32 len, void *data,
 	struct o2hb_nego_msg *nego_msg;
 
 	nego_msg = (struct o2hb_nego_msg *)msg->buf;
+	printk(KERN_NOTICE "o2hb: receive negotiate timeout message from node %d on region %s (%s).\n",
+		nego_msg->node_num, config_item_name(&reg->hr_item), reg->hr_dev_name);
 	if (nego_msg->node_num < O2NM_MAX_NODES)
 		set_bit(nego_msg->node_num, reg->hr_nego_node_bitmap);
 	else
@@ -449,7 +470,11 @@ static int o2hb_nego_timeout_handler(struct o2net_msg *msg, u32 len, void *data,
 static int o2hb_nego_approve_handler(struct o2net_msg *msg, u32 len, void *data,
 				void **ret_data)
 {
-	o2hb_arm_timeout((struct o2hb_region *)data);
+	struct o2hb_region *reg = (struct o2hb_region *)data;
+
+	printk(KERN_NOTICE "o2hb: negotiate timeout approved by master node on region %s (%s).\n",
+		config_item_name(&reg->hr_item), reg->hr_dev_name);
+	o2hb_arm_timeout(reg);
 	return 0;
 }
 
-- 
1.7.9.5

  parent reply	other threads:[~2016-01-20  3:13 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-01-20  3:13 [Ocfs2-devel] ocfs2: o2hb: not fence self if storage down Junxiao Bi
2016-01-20  3:13 ` [Ocfs2-devel] [PATCH 1/6] ocfs2: o2hb: add negotiate timer Junxiao Bi
2016-01-21 23:42   ` Andrew Morton
2016-01-22  3:23     ` Junxiao Bi
2016-01-22  0:56   ` Joseph Qi
2016-01-22  3:19     ` Junxiao Bi
2016-01-20  3:13 ` [Ocfs2-devel] [PATCH 2/6] ocfs2: o2hb: add NEGO_TIMEOUT message Junxiao Bi
2016-01-21 23:47   ` Andrew Morton
2016-01-22  5:12     ` Junxiao Bi
2016-01-22  5:45       ` Andrew Morton
2016-01-22  5:46         ` Junxiao Bi
2016-01-25  3:18   ` Eric Ren
2016-01-25  4:28     ` Junxiao Bi
2016-01-25  5:59       ` Eric Ren
2016-01-20  3:13 ` [Ocfs2-devel] [PATCH 3/6] ocfs2: o2hb: add NEGOTIATE_APPROVE message Junxiao Bi
2016-01-20  3:13 ` Junxiao Bi [this message]
2016-01-25  3:28   ` [Ocfs2-devel] [PATCH 4/6] ocfs2: o2hb: add some user/debug log Eric Ren
2016-01-25  4:29     ` Junxiao Bi
2016-01-25  6:00       ` Eric Ren
2016-01-20  3:13 ` [Ocfs2-devel] [PATCH 5/6] ocfs2: o2hb: don't negotiate if last hb fail Junxiao Bi
2016-01-20  3:13 ` [Ocfs2-devel] [PATCH 6/6] ocfs2: o2hb: fix hb hung time Junxiao Bi
2016-01-20  6:00 ` [Ocfs2-devel] ocfs2: o2hb: not fence self if storage down Gang He
2016-01-20  8:09   ` Junxiao Bi
2016-01-20  9:18 ` Joseph Qi
2016-01-20 13:27   ` Junxiao Bi
2016-01-21  0:46     ` Joseph Qi
2016-01-21  1:48       ` Junxiao Bi
2016-01-22  4:25         ` Joseph Qi
2016-01-22  5:08           ` Junxiao Bi
2016-01-21  8:34 ` rwxybh
2016-01-21  8:41   ` Junxiao Bi
  -- strict thread matches above, loose matches on Subject: below --
2016-05-23 21:50 [Ocfs2-devel] [patch 4/6] ocfs2: o2hb: add some user/debug log akpm at linux-foundation.org

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1453259619-5347-5-git-send-email-junxiao.bi@oracle.com \
    --to=junxiao.bi@oracle.com \
    --cc=ocfs2-devel@oss.oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).