From: Sunil Mushran <sunil.mushran@oracle.com>
To: ocfs2-devel@oss.oracle.com
Subject: [Ocfs2-devel] [PATCH] ocfs2: print node # when tcp fails -v4
Date: Tue, 30 Mar 2010 15:05:55 -0700 [thread overview]
Message-ID: <4BB275C3.5050707@oracle.com> (raw)
In-Reply-To: <201003300411.o2U3l0gx029151@acsinet15.oracle.com>
Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Wengang Wang wrote:
> #I resend the patch as V4 for a reminder. And I cleaned up some problems that
> #checkpatch.pl points out.
>
> This patch adds prints of the number of peer node to which sending tcp message
> failed. It helps debugging.
>
> Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
> ---
> fs/ocfs2/dlm/dlmast.c | 4 +++-
> fs/ocfs2/dlm/dlmconvert.c | 4 +++-
> fs/ocfs2/dlm/dlmdomain.c | 19 +++++++++++++------
> fs/ocfs2/dlm/dlmlock.c | 4 +++-
> fs/ocfs2/dlm/dlmmaster.c | 12 +++++++++---
> fs/ocfs2/dlm/dlmrecovery.c | 27 ++++++++++++++++++---------
> fs/ocfs2/dlm/dlmunlock.c | 3 ++-
> 7 files changed, 51 insertions(+), 22 deletions(-)
>
> diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
> index dccc439..390a887 100644
> --- a/fs/ocfs2/dlm/dlmast.c
> +++ b/fs/ocfs2/dlm/dlmast.c
> @@ -453,7 +453,9 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
> ret = o2net_send_message_vec(DLM_PROXY_AST_MSG, dlm->key, vec, veclen,
> lock->ml.node, &status);
> if (ret < 0)
> - mlog_errno(ret);
> + mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
> + "node %u\n", ret, DLM_PROXY_AST_MSG, dlm->key,
> + lock->ml.node);
> else {
> if (status == DLM_RECOVERING) {
> mlog(ML_ERROR, "sent AST to node %u, it thinks this "
> diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
> index f283bce..3028d05 100644
> --- a/fs/ocfs2/dlm/dlmconvert.c
> +++ b/fs/ocfs2/dlm/dlmconvert.c
> @@ -391,7 +391,9 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm,
> } else if (ret != DLM_NORMAL && ret != DLM_NOTQUEUED)
> dlm_error(ret);
> } else {
> - mlog_errno(tmpret);
> + mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
> + "node %u\n", tmpret, DLM_CONVERT_LOCK_MSG, dlm->key,
> + res->owner);
> if (dlm_is_host_down(tmpret)) {
> /* instead of logging the same network error over
> * and over, sleep here and wait for the heartbeat
> diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
> index 988c905..eb50be0 100644
> --- a/fs/ocfs2/dlm/dlmdomain.c
> +++ b/fs/ocfs2/dlm/dlmdomain.c
> @@ -565,7 +565,9 @@ static int dlm_send_one_domain_exit(struct dlm_ctxt *dlm,
> status = o2net_send_message(DLM_EXIT_DOMAIN_MSG, dlm->key,
> &leave_msg, sizeof(leave_msg), node,
> NULL);
> -
> + if (status < 0)
> + mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
> + "node %u\n", status, DLM_EXIT_DOMAIN_MSG, dlm->key, node);
> mlog(0, "status return %d from o2net_send_message\n", status);
>
> return status;
> @@ -962,7 +964,9 @@ static int dlm_send_one_join_cancel(struct dlm_ctxt *dlm,
> &cancel_msg, sizeof(cancel_msg), node,
> NULL);
> if (status < 0) {
> - mlog_errno(status);
> + mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
> + "node %u\n", status, DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY,
> + node);
> goto bail;
> }
>
> @@ -1029,10 +1033,11 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
> byte_copymap(join_msg.node_map, dlm->live_nodes_map, O2NM_MAX_NODES);
>
> status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg,
> - sizeof(join_msg), node,
> - &join_resp);
> + sizeof(join_msg), node, &join_resp);
> if (status < 0 && status != -ENOPROTOOPT) {
> - mlog_errno(status);
> + mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
> + "node %u\n", status, DLM_QUERY_JOIN_MSG, DLM_MOD_KEY,
> + node);
> goto bail;
> }
> dlm_query_join_wire_to_packet(join_resp, &packet);
> @@ -1103,7 +1108,9 @@ static int dlm_send_one_join_assert(struct dlm_ctxt *dlm,
> &assert_msg, sizeof(assert_msg), node,
> NULL);
> if (status < 0)
> - mlog_errno(status);
> + mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
> + "node %u\n", status, DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY,
> + node);
>
> return status;
> }
> diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
> index 7333377..f1fba2a 100644
> --- a/fs/ocfs2/dlm/dlmlock.c
> +++ b/fs/ocfs2/dlm/dlmlock.c
> @@ -329,7 +329,9 @@ static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm,
> BUG();
> }
> } else {
> - mlog_errno(tmpret);
> + mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
> + "node %u\n", tmpret, DLM_CREATE_LOCK_MSG, dlm->key,
> + res->owner);
> if (dlm_is_host_down(tmpret)) {
> ret = DLM_RECOVERING;
> mlog(0, "node %u died so returning DLM_RECOVERING "
> diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
> index a659606..3114de2 100644
> --- a/fs/ocfs2/dlm/dlmmaster.c
> +++ b/fs/ocfs2/dlm/dlmmaster.c
> @@ -1666,7 +1666,9 @@ again:
> tmpret = o2net_send_message(DLM_ASSERT_MASTER_MSG, dlm->key,
> &assert, sizeof(assert), to, &r);
> if (tmpret < 0) {
> - mlog(0, "assert_master returned %d!\n", tmpret);
> + mlog(ML_ERROR, "Error %d when sending message %u (key "
> + "0x%x) to node %u\n", tmpret,
> + DLM_ASSERT_MASTER_MSG, dlm->key, to);
> if (!dlm_is_host_down(tmpret)) {
> mlog(ML_ERROR, "unhandled error=%d!\n", tmpret);
> BUG();
> @@ -2207,7 +2209,9 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
> ret = o2net_send_message(DLM_DEREF_LOCKRES_MSG, dlm->key,
> &deref, sizeof(deref), res->owner, &r);
> if (ret < 0)
> - mlog_errno(ret);
> + mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
> + "node %u\n", ret, DLM_DEREF_LOCKRES_MSG, dlm->key,
> + res->owner);
> else if (r < 0) {
> /* BAD. other node says I did not have a ref. */
> mlog(ML_ERROR,"while dropping ref on %s:%.*s "
> @@ -2977,7 +2981,9 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
> &migrate, sizeof(migrate), nodenum,
> &status);
> if (ret < 0) {
> - mlog(0, "migrate_request returned %d!\n", ret);
> + mlog(ML_ERROR, "Error %d when sending message %u (key "
> + "0x%x) to node %u\n", ret, DLM_MIGRATE_REQUEST_MSG,
> + dlm->key, nodenum);
> if (!dlm_is_host_down(ret)) {
> mlog(ML_ERROR, "unhandled error=%d!\n", ret);
> BUG();
> diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
> index b4f99de..f8b75ce 100644
> --- a/fs/ocfs2/dlm/dlmrecovery.c
> +++ b/fs/ocfs2/dlm/dlmrecovery.c
> @@ -803,7 +803,9 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from,
>
> /* negative status is handled by caller */
> if (ret < 0)
> - mlog_errno(ret);
> + mlog(ML_ERROR, "Error %d when sending message %u (key "
> + "0x%x) to node %u\n", ret, DLM_LOCK_REQUEST_MSG,
> + dlm->key, request_from);
>
> // return from here, then
> // sleep until all received or error
> @@ -955,10 +957,10 @@ static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, u8 dead_node, u8 send_to)
> ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg,
> sizeof(done_msg), send_to, &tmpret);
> if (ret < 0) {
> + mlog(ML_ERROR, "Error %d when sending message %u (key "
> + "0x%x) to node %u\n", ret, DLM_RECO_DATA_DONE_MSG,
> + dlm->key, send_to);
> if (!dlm_is_host_down(ret)) {
> - mlog_errno(ret);
> - mlog(ML_ERROR, "%s: unknown error sending data-done "
> - "to %u\n", dlm->name, send_to);
> BUG();
> }
> } else
> @@ -1126,7 +1128,9 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
> if (ret < 0) {
> /* XXX: negative status is not handled.
> * this will end up killing this node. */
> - mlog_errno(ret);
> + mlog(ML_ERROR, "Error %d when sending message %u (key "
> + "0x%x) to node %u\n", ret, DLM_MIG_LOCKRES_MSG,
> + dlm->key, send_to);
> } else {
> /* might get an -ENOMEM back here */
> ret = status;
> @@ -1642,7 +1646,9 @@ int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
> &req, sizeof(req), nodenum, &status);
> /* XXX: negative status not handled properly here. */
> if (ret < 0)
> - mlog_errno(ret);
> + mlog(ML_ERROR, "Error %d when sending message %u (key "
> + "0x%x) to node %u\n", ret, DLM_MASTER_REQUERY_MSG,
> + dlm->key, nodenum);
> else {
> BUG_ON(status < 0);
> BUG_ON(status > DLM_LOCK_RES_OWNER_UNKNOWN);
> @@ -2640,7 +2646,7 @@ retry:
> if (dlm_is_host_down(ret)) {
> /* node is down. not involved in recovery
> * so just keep going */
> - mlog(0, "%s: node %u was down when sending "
> + mlog(ML_NOTICE, "%s: node %u was down when sending "
> "begin reco msg (%d)\n", dlm->name, nodenum, ret);
> ret = 0;
> }
> @@ -2660,11 +2666,12 @@ retry:
> }
> if (ret < 0) {
> struct dlm_lock_resource *res;
> +
> /* this is now a serious problem, possibly ENOMEM
> * in the network stack. must retry */
> mlog_errno(ret);
> mlog(ML_ERROR, "begin reco of dlm %s to node %u "
> - " returned %d\n", dlm->name, nodenum, ret);
> + "returned %d\n", dlm->name, nodenum, ret);
> res = dlm_lookup_lockres(dlm, DLM_RECOVERY_LOCK_NAME,
> DLM_RECOVERY_LOCK_NAME_LEN);
> if (res) {
> @@ -2789,7 +2796,9 @@ stage2:
> if (ret >= 0)
> ret = status;
> if (ret < 0) {
> - mlog_errno(ret);
> + mlog(ML_ERROR, "Error %d when sending message %u (key "
> + "0x%x) to node %u\n", ret, DLM_FINALIZE_RECO_MSG,
> + dlm->key, nodenum);
> if (dlm_is_host_down(ret)) {
> /* this has no effect on this recovery
> * session, so set the status to zero to
> diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
> index 49e29ec..2c1f306 100644
> --- a/fs/ocfs2/dlm/dlmunlock.c
> +++ b/fs/ocfs2/dlm/dlmunlock.c
> @@ -355,7 +355,8 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
> mlog(0, "master was in-progress. retry\n");
> ret = status;
> } else {
> - mlog_errno(tmpret);
> + mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
> + "node %u\n", tmpret, DLM_UNLOCK_LOCK_MSG, dlm->key, owner);
> if (dlm_is_host_down(tmpret)) {
> /* NOTE: this seems strange, but it is what we want.
> * when the master goes down during a cancel or
>
next prev parent reply other threads:[~2010-03-30 22:05 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-03-30 4:09 [Ocfs2-devel] [PATCH] ocfs2: print node # when tcp fails -v4 Wengang Wang
2010-03-30 22:05 ` Sunil Mushran [this message]
2010-04-07 1:17 ` Joel Becker
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4BB275C3.5050707@oracle.com \
--to=sunil.mushran@oracle.com \
--cc=ocfs2-devel@oss.oracle.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.