* [Ocfs2-devel] [PATCH 1/2] ocfs2/cluster: Heartbeat mismatch message improved
@ 2011-02-21 22:25 Sunil Mushran
2011-02-21 22:25 ` [Ocfs2-devel] [PATCH 2/2] ocfs2/dlm: Move kmalloc() outside the spinlock Sunil Mushran
2011-02-22 9:34 ` [Ocfs2-devel] [PATCH 1/2] ocfs2/cluster: Heartbeat mismatch message improved Joel Becker
0 siblings, 2 replies; 5+ messages in thread
From: Sunil Mushran @ 2011-02-21 22:25 UTC (permalink / raw)
To: ocfs2-devel
If o2hb finds unexpected values in the heartbeat slot, it prints a message
"ERROR: Device "dm-6": another node is heartbeating in our slot!"
This patch adds more information allowing us to see the actual mismatch.
The new message reads "ERROR: Heartbeat mismatch on "dm-6": expected(92:0x234543,
0x76567) ondisk(92:0x234543, 0x76565)"
Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
---
fs/ocfs2/cluster/heartbeat.c | 34 +++++++++++++++++-----------------
1 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index b108e86..d633df0 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -543,25 +543,27 @@ static int o2hb_verify_crc(struct o2hb_region *reg,
/* We want to make sure that nobody is heartbeating on top of us --
* this will help detect an invalid configuration. */
-static int o2hb_check_last_timestamp(struct o2hb_region *reg)
+static void o2hb_check_last_timestamp(struct o2hb_region *reg)
{
- int node_num, ret;
struct o2hb_disk_slot *slot;
struct o2hb_disk_heartbeat_block *hb_block;
- node_num = o2nm_this_node();
-
- ret = 1;
- slot = ®->hr_slots[node_num];
+ slot = ®->hr_slots[o2nm_this_node()];
/* Don't check on our 1st timestamp */
- if (slot->ds_last_time) {
- hb_block = slot->ds_raw_block;
+ if (!slot->ds_last_time)
+ return;
- if (le64_to_cpu(hb_block->hb_seq) != slot->ds_last_time)
- ret = 0;
- }
+ hb_block = slot->ds_raw_block;
+ if (le64_to_cpu(hb_block->hb_seq) == slot->ds_last_time)
+ return;
- return ret;
+ mlog(ML_ERROR, "Heartbeat mismatch on \"%s\": "
+ "expected(%u:0x%llx, 0x%llx), ondisk(%u:0x%llx, 0x%llx)\n",
+ reg->hr_dev_name, slot->ds_node_num,
+ (unsigned long long)slot->ds_last_generation,
+ (unsigned long long)slot->ds_last_time, hb_block->hb_node,
+ (unsigned long long)le64_to_cpu(hb_block->hb_generation),
+ (unsigned long long)le64_to_cpu(hb_block->hb_seq));
}
static inline void o2hb_prepare_block(struct o2hb_region *reg,
@@ -987,9 +989,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
/* With an up to date view of the slots, we can check that no
* other node has been improperly configured to heartbeat in
* our slot. */
- if (!o2hb_check_last_timestamp(reg))
- mlog(ML_ERROR, "Device \"%s\": another node is heartbeating "
- "in our slot!\n", reg->hr_dev_name);
+ o2hb_check_last_timestamp(reg);
/* fill in the proper info for our next heartbeat */
o2hb_prepare_block(reg, reg->hr_generation);
@@ -1003,8 +1003,8 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
}
i = -1;
- while((i = find_next_bit(configured_nodes, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
-
+ while((i = find_next_bit(configured_nodes,
+ O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
change |= o2hb_check_slot(reg, ®->hr_slots[i]);
}
--
1.7.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [Ocfs2-devel] [PATCH 2/2] ocfs2/dlm: Move kmalloc() outside the spinlock
2011-02-21 22:25 [Ocfs2-devel] [PATCH 1/2] ocfs2/cluster: Heartbeat mismatch message improved Sunil Mushran
@ 2011-02-21 22:25 ` Sunil Mushran
2011-02-22 9:36 ` Joel Becker
2011-02-22 9:34 ` [Ocfs2-devel] [PATCH 1/2] ocfs2/cluster: Heartbeat mismatch message improved Joel Becker
1 sibling, 1 reply; 5+ messages in thread
From: Sunil Mushran @ 2011-02-21 22:25 UTC (permalink / raw)
To: ocfs2-devel
In dlm_query_region_handler(), move the kmalloc outside the spinlock.
This allows us to use GFP_KERNEL instead of GFP_ATOMIC.
Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
---
fs/ocfs2/dlm/dlmdomain.c | 28 ++++++++++++++++------------
1 files changed, 16 insertions(+), 12 deletions(-)
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 7e38a07..99805d5 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -926,9 +926,10 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
}
static int dlm_match_regions(struct dlm_ctxt *dlm,
- struct dlm_query_region *qr)
+ struct dlm_query_region *qr,
+ char *local, int locallen)
{
- char *local = NULL, *remote = qr->qr_regions;
+ char *remote = qr->qr_regions;
char *l, *r;
int localnr, i, j, foundit;
int status = 0;
@@ -957,13 +958,8 @@ static int dlm_match_regions(struct dlm_ctxt *dlm,
r += O2HB_MAX_REGION_NAME_LEN;
}
- local = kmalloc(sizeof(qr->qr_regions), GFP_ATOMIC);
- if (!local) {
- status = -ENOMEM;
- goto bail;
- }
-
- localnr = o2hb_get_all_regions(local, O2NM_MAX_REGIONS);
+ localnr = min(O2NM_MAX_REGIONS, locallen/O2HB_MAX_REGION_NAME_LEN);
+ localnr = o2hb_get_all_regions(local, (u8)localnr);
/* compare local regions with remote */
l = local;
@@ -1012,8 +1008,6 @@ static int dlm_match_regions(struct dlm_ctxt *dlm,
}
bail:
- kfree(local);
-
return status;
}
@@ -1075,6 +1069,7 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
{
struct dlm_query_region *qr;
struct dlm_ctxt *dlm = NULL;
+ char *local = NULL;
int status = 0;
int locked = 0;
@@ -1083,6 +1078,13 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
mlog(0, "Node %u queries hb regions on domain %s\n", qr->qr_node,
qr->qr_domain);
+ /* buffer used in dlm_mast_regions() */
+ local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL);
+ if (!local) {
+ status = -ENOMEM;
+ goto bail;
+ }
+
status = -EINVAL;
spin_lock(&dlm_domain_lock);
@@ -1112,13 +1114,15 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
goto bail;
}
- status = dlm_match_regions(dlm, qr);
+ status = dlm_match_regions(dlm, qr, local, sizeof(qr->qr_regions));
bail:
if (locked)
spin_unlock(&dlm->spinlock);
spin_unlock(&dlm_domain_lock);
+ kfree(local);
+
return status;
}
--
1.7.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [Ocfs2-devel] [PATCH 1/2] ocfs2/cluster: Heartbeat mismatch message improved
2011-02-21 22:25 [Ocfs2-devel] [PATCH 1/2] ocfs2/cluster: Heartbeat mismatch message improved Sunil Mushran
2011-02-21 22:25 ` [Ocfs2-devel] [PATCH 2/2] ocfs2/dlm: Move kmalloc() outside the spinlock Sunil Mushran
@ 2011-02-22 9:34 ` Joel Becker
2011-02-23 2:21 ` Sunil Mushran
1 sibling, 1 reply; 5+ messages in thread
From: Joel Becker @ 2011-02-22 9:34 UTC (permalink / raw)
To: ocfs2-devel
On Mon, Feb 21, 2011 at 02:25:24PM -0800, Sunil Mushran wrote:
> If o2hb finds unexpected values in the heartbeat slot, it prints a message
> "ERROR: Device "dm-6": another node is heartbeating in our slot!"
>
> This patch adds more information allowing us to see the actual mismatch.
> The new message reads "ERROR: Heartbeat mismatch on "dm-6": expected(92:0x234543,
> 0x76567) ondisk(92:0x234543, 0x76565)"
Can you still say 'another node is heartbeating in our slot',
just with the detail? The new message doesn't give a clue as to what is
happening.
Joel
--
"The first thing we do, let's kill all the lawyers."
-Henry VI, IV:ii
http://www.jlbec.org/
jlbec at evilplan.org
^ permalink raw reply [flat|nested] 5+ messages in thread
* [Ocfs2-devel] [PATCH 2/2] ocfs2/dlm: Move kmalloc() outside the spinlock
2011-02-21 22:25 ` [Ocfs2-devel] [PATCH 2/2] ocfs2/dlm: Move kmalloc() outside the spinlock Sunil Mushran
@ 2011-02-22 9:36 ` Joel Becker
0 siblings, 0 replies; 5+ messages in thread
From: Joel Becker @ 2011-02-22 9:36 UTC (permalink / raw)
To: ocfs2-devel
On Mon, Feb 21, 2011 at 02:25:25PM -0800, Sunil Mushran wrote:
> In dlm_query_region_handler(), move the kmalloc outside the spinlock.
> This allows us to use GFP_KERNEL instead of GFP_ATOMIC.
>
> Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
This patch is now in the merge-window branch of ocfs2.git.
Joel
--
"Conservative, n. A statesman who is enamoured of existing evils,
as distinguished from the Liberal, who wishes to replace them
with others."
- Ambrose Bierce, The Devil's Dictionary
http://www.jlbec.org/
jlbec at evilplan.org
^ permalink raw reply [flat|nested] 5+ messages in thread
* [Ocfs2-devel] [PATCH 1/2] ocfs2/cluster: Heartbeat mismatch message improved
2011-02-22 9:34 ` [Ocfs2-devel] [PATCH 1/2] ocfs2/cluster: Heartbeat mismatch message improved Joel Becker
@ 2011-02-23 2:21 ` Sunil Mushran
0 siblings, 0 replies; 5+ messages in thread
From: Sunil Mushran @ 2011-02-23 2:21 UTC (permalink / raw)
To: ocfs2-devel
On 02/22/2011 01:34 AM, Joel Becker wrote:
> On Mon, Feb 21, 2011 at 02:25:24PM -0800, Sunil Mushran wrote:
>> If o2hb finds unexpected values in the heartbeat slot, it prints a message
>> "ERROR: Device "dm-6": another node is heartbeating in our slot!"
>>
>> This patch adds more information allowing us to see the actual mismatch.
>> The new message reads "ERROR: Heartbeat mismatch on "dm-6": expected(92:0x234543,
>> 0x76567) ondisk(92:0x234543, 0x76565)"
> Can you still say 'another node is heartbeating in our slot',
> just with the detail? The new message doesn't give a clue as to what is
> happening.
OK. I'll improve on the message.
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2011-02-23 2:21 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-02-21 22:25 [Ocfs2-devel] [PATCH 1/2] ocfs2/cluster: Heartbeat mismatch message improved Sunil Mushran
2011-02-21 22:25 ` [Ocfs2-devel] [PATCH 2/2] ocfs2/dlm: Move kmalloc() outside the spinlock Sunil Mushran
2011-02-22 9:36 ` Joel Becker
2011-02-22 9:34 ` [Ocfs2-devel] [PATCH 1/2] ocfs2/cluster: Heartbeat mismatch message improved Joel Becker
2011-02-23 2:21 ` Sunil Mushran
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).