* [PATCH V4] opensm: Move Error printing to MAD error call back functions.
@ 2011-12-16 17:49 Ira Weiny
[not found] ` <20111216094934.0f7bd2ee.weiny2-i2BcT+NCU+M@public.gmane.org>
0 siblings, 1 reply; 4+ messages in thread
From: Ira Weiny @ 2011-12-16 17:49 UTC (permalink / raw)
To: Alex Netes, Hal Rosenstock
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Only print the transaction ID of timed out MAD's on VERBOSE.
Signed-off-by: Ira Weiny <weiny2-i2BcT+NCU+M@public.gmane.org>
---
libvendor/osm_vendor_ibumad.c | 27 +++------------------------
opensm/osm_helper.c | 5 +++--
opensm/osm_perfmgr.c | 4 +++-
opensm/osm_sa_mad_ctrl.c | 12 +++++++++++-
opensm/osm_sm_mad_ctrl.c | 16 ++++++++++++++--
5 files changed, 34 insertions(+), 30 deletions(-)
diff --git a/libvendor/osm_vendor_ibumad.c b/libvendor/osm_vendor_ibumad.c
index e2ebd8e..00069f5 100644
--- a/libvendor/osm_vendor_ibumad.c
+++ b/libvendor/osm_vendor_ibumad.c
@@ -327,30 +327,9 @@ static void *umad_receiver(void *p_ptr)
/* if status != 0 then we are handling recv timeout on send */
if (umad_status(p_madw->vend_wrap.umad)) {
- if (mad->mgmt_class != IB_MCLASS_SUBN_DIR) {
- /* LID routed */
- OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5410: "
- "Send completed with error -- dropping\n"
- "\t\t\tClass 0x%x, Method 0x%X, Attr 0x%X, "
- "TID 0x%" PRIx64 ", LID %u\n",
- mad->mgmt_class, mad->method,
- cl_ntoh16(mad->attr_id),
- cl_ntoh64(mad->trans_id),
- cl_ntoh16(ib_mad_addr->lid));
- } else {
- ib_smp_t *smp;
-
- /* Direct routed SMP */
- smp = (ib_smp_t *) mad;
- OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5411: "
- "DR SMP Send completed with error -- dropping\n"
- "\t\t\tMethod 0x%X, Attr 0x%X, TID 0x%" PRIx64
- ", Hop Ptr: 0x%X\n",
- mad->method, cl_ntoh16(mad->attr_id),
- cl_ntoh64(mad->trans_id), smp->hop_ptr);
- osm_dump_smp_dr_path(p_vend->p_log, smp,
- OSM_LOG_ERROR);
- }
+ OSM_LOG(p_vend->p_log, OSM_LOG_VERBOSE, "ERR 5410: "
+ "Receive Timeout on Send -- dropping "
+ "TID 0x%" PRIx64 "\n", cl_ntoh64(mad->trans_id));
if (!(p_req_madw = get_madw(p_vend, &mad->trans_id))) {
OSM_LOG(p_vend->p_log, OSM_LOG_ERROR,
diff --git a/opensm/osm_helper.c b/opensm/osm_helper.c
index f9f3d9d..b968679 100644
--- a/opensm/osm_helper.c
+++ b/opensm/osm_helper.c
@@ -2059,8 +2059,9 @@ void osm_dump_smp_dr_path(IN osm_log_t * p_log, IN const ib_smp_t * p_smp,
char buf[BUF_SIZE];
unsigned n;
- n = sprintf(buf, "Received SMP on a %u hop path: "
- "Initial path = ", p_smp->hop_count);
+ n = sprintf(buf, " DR SMP (TID 0x%" PRIx64 ") on a %u hop path: "
+ "Initial path = ",
+ cl_ntoh64(p_smp->trans_id), p_smp->hop_count);
n += sprint_uint8_arr(buf + n, sizeof(buf) - n,
p_smp->initial_path,
p_smp->hop_count + 1);
diff --git a/opensm/osm_perfmgr.c b/opensm/osm_perfmgr.c
index ded5a5e..fc3f74b 100644
--- a/opensm/osm_perfmgr.c
+++ b/opensm/osm_perfmgr.c
@@ -212,7 +212,9 @@ static void perfmgr_mad_send_err_callback(void *bind_context,
p_mon_node = (monitored_node_t *) p_node;
OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C02: %s (0x%" PRIx64
- ") port %u\n", p_mon_node->name, p_mon_node->guid, port);
+ ") port %u; DLID %u, TID 0x%" PRIx64 "\n", p_mon_node->name,
+ p_mon_node->guid, port, cl_ntoh16(p_madw->mad_addr.dest_lid),
+ cl_ntoh64(p_madw->p_mad->trans_id));
if (pm->subn->opt.perfmgr_redir && p_madw->status == IB_TIMEOUT) {
/* First, find the node in the monitored map */
diff --git a/opensm/osm_sa_mad_ctrl.c b/opensm/osm_sa_mad_ctrl.c
index bde88fa..4caead1 100644
--- a/opensm/osm_sa_mad_ctrl.c
+++ b/opensm/osm_sa_mad_ctrl.c
@@ -413,8 +413,18 @@ static void sa_mad_ctrl_send_err_callback(IN void *context,
Retire the original request MAD.
*/
+ OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A0A: "
+ "SA MAD completed in error (%s): "
+ "%s(%s), attr_mod 0x%x, TID 0x%" PRIx64 ", DLID %u\n",
+ ib_get_err_str(p_madw->status),
+ ib_get_sa_method_str(p_madw->p_mad->method),
+ ib_get_sa_attr_str(p_madw->p_mad->attr_id),
+ cl_ntoh32(p_madw->p_mad->attr_mod),
+ cl_ntoh64(p_madw->p_mad->trans_id),
+ cl_ntoh16(p_madw->mad_addr.dest_lid));
+
osm_dump_sa_mad(p_ctrl->p_log, osm_madw_get_sa_mad_ptr(p_madw),
- OSM_LOG_ERROR);
+ OSM_LOG_VERBOSE);
/* sm_mad_ctrl_update_wire_stats( p_ctrl ); */
diff --git a/opensm/osm_sm_mad_ctrl.c b/opensm/osm_sm_mad_ctrl.c
index ee92c66..a3b444a 100644
--- a/opensm/osm_sm_mad_ctrl.c
+++ b/opensm/osm_sm_mad_ctrl.c
@@ -704,6 +704,7 @@ Exit:
*/
static void sm_mad_ctrl_send_err_cb(IN void *context, IN osm_madw_t * p_madw)
{
+ char lidstr[8];
osm_sm_mad_ctrl_t *p_ctrl = context;
ib_api_status_t status;
ib_smp_t *p_smp;
@@ -713,13 +714,24 @@ static void sm_mad_ctrl_send_err_cb(IN void *context, IN osm_madw_t * p_madw)
CL_ASSERT(p_madw);
p_smp = osm_madw_get_smp_ptr(p_madw);
+
+ if (p_smp->mgmt_class == IB_MCLASS_SUBN_DIR)
+ lidstr[0] = '\0';
+ else
+ snprintf(lidstr, 8, " DLID %u",
+ cl_ntoh16(p_madw->mad_addr.dest_lid));
+
OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3113: "
"MAD completed in error (%s): "
- "%s(%s), attr_mod 0x%x, TID 0x%" PRIx64 "\n",
+ "%s(%s), attr_mod 0x%x, TID 0x%" PRIx64 " %s\n",
ib_get_err_str(p_madw->status),
ib_get_sm_method_str(p_smp->method),
ib_get_sm_attr_str(p_smp->attr_id), cl_ntoh32(p_smp->attr_mod),
- cl_ntoh64(p_smp->trans_id));
+ cl_ntoh64(p_smp->trans_id),
+ lidstr);
+
+ if (p_smp->mgmt_class == IB_MCLASS_SUBN_DIR)
+ osm_dump_smp_dr_path(p_ctrl->p_log, p_smp, OSM_LOG_ERROR);
/*
If this was a SubnSet MAD, then this error might indicate a problem
--
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 4+ messages in thread[parent not found: <20111216094934.0f7bd2ee.weiny2-i2BcT+NCU+M@public.gmane.org>]
* Re: [PATCH V4] opensm: Move Error printing to MAD error call back functions. [not found] ` <20111216094934.0f7bd2ee.weiny2-i2BcT+NCU+M@public.gmane.org> @ 2011-12-16 19:27 ` Hal Rosenstock [not found] ` <4EEB9BAC.5040104-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> 0 siblings, 1 reply; 4+ messages in thread From: Hal Rosenstock @ 2011-12-16 19:27 UTC (permalink / raw) To: Ira Weiny; +Cc: Alex Netes, linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org On 12/16/2011 12:49 PM, Ira Weiny wrote: > > Only print the transaction ID of timed out MAD's on VERBOSE. > > Signed-off-by: Ira Weiny <weiny2-i2BcT+NCU+M@public.gmane.org> > --- > libvendor/osm_vendor_ibumad.c | 27 +++------------------------ > opensm/osm_helper.c | 5 +++-- > opensm/osm_perfmgr.c | 4 +++- > opensm/osm_sa_mad_ctrl.c | 12 +++++++++++- > opensm/osm_sm_mad_ctrl.c | 16 ++++++++++++++-- > 5 files changed, 34 insertions(+), 30 deletions(-) > > diff --git a/libvendor/osm_vendor_ibumad.c b/libvendor/osm_vendor_ibumad.c > index e2ebd8e..00069f5 100644 > --- a/libvendor/osm_vendor_ibumad.c > +++ b/libvendor/osm_vendor_ibumad.c > @@ -327,30 +327,9 @@ static void *umad_receiver(void *p_ptr) > /* if status != 0 then we are handling recv timeout on send */ > if (umad_status(p_madw->vend_wrap.umad)) { > > - if (mad->mgmt_class != IB_MCLASS_SUBN_DIR) { > - /* LID routed */ > - OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5410: " > - "Send completed with error -- dropping\n" > - "\t\t\tClass 0x%x, Method 0x%X, Attr 0x%X, " > - "TID 0x%" PRIx64 ", LID %u\n", > - mad->mgmt_class, mad->method, > - cl_ntoh16(mad->attr_id), > - cl_ntoh64(mad->trans_id), > - cl_ntoh16(ib_mad_addr->lid)); > - } else { > - ib_smp_t *smp; > - > - /* Direct routed SMP */ > - smp = (ib_smp_t *) mad; > - OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5411: " > - "DR SMP Send completed with error -- dropping\n" > - "\t\t\tMethod 0x%X, Attr 0x%X, TID 0x%" PRIx64 > - ", Hop Ptr: 0x%X\n", > - mad->method, cl_ntoh16(mad->attr_id), > - cl_ntoh64(mad->trans_id), smp->hop_ptr); One thing I just noticed in cobbling up the other approach for comparison purposes is that the logging of the hop pointer was removed. Should that be preserved ? -- Hal > - osm_dump_smp_dr_path(p_vend->p_log, smp, > - OSM_LOG_ERROR); > - } > + OSM_LOG(p_vend->p_log, OSM_LOG_VERBOSE, "ERR 5410: " > + "Receive Timeout on Send -- dropping " > + "TID 0x%" PRIx64 "\n", cl_ntoh64(mad->trans_id)); > > if (!(p_req_madw = get_madw(p_vend, &mad->trans_id))) { > OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, > diff --git a/opensm/osm_helper.c b/opensm/osm_helper.c > index f9f3d9d..b968679 100644 > --- a/opensm/osm_helper.c > +++ b/opensm/osm_helper.c > @@ -2059,8 +2059,9 @@ void osm_dump_smp_dr_path(IN osm_log_t * p_log, IN const ib_smp_t * p_smp, > char buf[BUF_SIZE]; > unsigned n; > > - n = sprintf(buf, "Received SMP on a %u hop path: " > - "Initial path = ", p_smp->hop_count); > + n = sprintf(buf, " DR SMP (TID 0x%" PRIx64 ") on a %u hop path: " > + "Initial path = ", > + cl_ntoh64(p_smp->trans_id), p_smp->hop_count); > n += sprint_uint8_arr(buf + n, sizeof(buf) - n, > p_smp->initial_path, > p_smp->hop_count + 1); > diff --git a/opensm/osm_perfmgr.c b/opensm/osm_perfmgr.c > index ded5a5e..fc3f74b 100644 > --- a/opensm/osm_perfmgr.c > +++ b/opensm/osm_perfmgr.c > @@ -212,7 +212,9 @@ static void perfmgr_mad_send_err_callback(void *bind_context, > p_mon_node = (monitored_node_t *) p_node; > > OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C02: %s (0x%" PRIx64 > - ") port %u\n", p_mon_node->name, p_mon_node->guid, port); > + ") port %u; DLID %u, TID 0x%" PRIx64 "\n", p_mon_node->name, > + p_mon_node->guid, port, cl_ntoh16(p_madw->mad_addr.dest_lid), > + cl_ntoh64(p_madw->p_mad->trans_id)); > > if (pm->subn->opt.perfmgr_redir && p_madw->status == IB_TIMEOUT) { > /* First, find the node in the monitored map */ > diff --git a/opensm/osm_sa_mad_ctrl.c b/opensm/osm_sa_mad_ctrl.c > index bde88fa..4caead1 100644 > --- a/opensm/osm_sa_mad_ctrl.c > +++ b/opensm/osm_sa_mad_ctrl.c > @@ -413,8 +413,18 @@ static void sa_mad_ctrl_send_err_callback(IN void *context, > Retire the original request MAD. > */ > > + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A0A: " > + "SA MAD completed in error (%s): " > + "%s(%s), attr_mod 0x%x, TID 0x%" PRIx64 ", DLID %u\n", > + ib_get_err_str(p_madw->status), > + ib_get_sa_method_str(p_madw->p_mad->method), > + ib_get_sa_attr_str(p_madw->p_mad->attr_id), > + cl_ntoh32(p_madw->p_mad->attr_mod), > + cl_ntoh64(p_madw->p_mad->trans_id), > + cl_ntoh16(p_madw->mad_addr.dest_lid)); > + > osm_dump_sa_mad(p_ctrl->p_log, osm_madw_get_sa_mad_ptr(p_madw), > - OSM_LOG_ERROR); > + OSM_LOG_VERBOSE); > > /* sm_mad_ctrl_update_wire_stats( p_ctrl ); */ > > diff --git a/opensm/osm_sm_mad_ctrl.c b/opensm/osm_sm_mad_ctrl.c > index ee92c66..a3b444a 100644 > --- a/opensm/osm_sm_mad_ctrl.c > +++ b/opensm/osm_sm_mad_ctrl.c > @@ -704,6 +704,7 @@ Exit: > */ > static void sm_mad_ctrl_send_err_cb(IN void *context, IN osm_madw_t * p_madw) > { > + char lidstr[8]; > osm_sm_mad_ctrl_t *p_ctrl = context; > ib_api_status_t status; > ib_smp_t *p_smp; > @@ -713,13 +714,24 @@ static void sm_mad_ctrl_send_err_cb(IN void *context, IN osm_madw_t * p_madw) > CL_ASSERT(p_madw); > > p_smp = osm_madw_get_smp_ptr(p_madw); > + > + if (p_smp->mgmt_class == IB_MCLASS_SUBN_DIR) > + lidstr[0] = '\0'; > + else > + snprintf(lidstr, 8, " DLID %u", > + cl_ntoh16(p_madw->mad_addr.dest_lid)); > + > OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3113: " > "MAD completed in error (%s): " > - "%s(%s), attr_mod 0x%x, TID 0x%" PRIx64 "\n", > + "%s(%s), attr_mod 0x%x, TID 0x%" PRIx64 " %s\n", > ib_get_err_str(p_madw->status), > ib_get_sm_method_str(p_smp->method), > ib_get_sm_attr_str(p_smp->attr_id), cl_ntoh32(p_smp->attr_mod), > - cl_ntoh64(p_smp->trans_id)); > + cl_ntoh64(p_smp->trans_id), > + lidstr); > + > + if (p_smp->mgmt_class == IB_MCLASS_SUBN_DIR) > + osm_dump_smp_dr_path(p_ctrl->p_log, p_smp, OSM_LOG_ERROR); > > /* > If this was a SubnSet MAD, then this error might indicate a problem -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 4+ messages in thread
[parent not found: <4EEB9BAC.5040104-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>]
* Re: [PATCH V4] opensm: Move Error printing to MAD error call back functions. [not found] ` <4EEB9BAC.5040104-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> @ 2011-12-16 19:45 ` Ira Weiny [not found] ` <20111216114557.27cef81e.weiny2-i2BcT+NCU+M@public.gmane.org> 0 siblings, 1 reply; 4+ messages in thread From: Ira Weiny @ 2011-12-16 19:45 UTC (permalink / raw) To: Hal Rosenstock Cc: Alex Netes, linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org On Fri, 16 Dec 2011 11:27:40 -0800 Hal Rosenstock <hal-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> wrote: > On 12/16/2011 12:49 PM, Ira Weiny wrote: > > > > Only print the transaction ID of timed out MAD's on VERBOSE. > > > > Signed-off-by: Ira Weiny <weiny2-i2BcT+NCU+M@public.gmane.org> > > --- [snip] > > - } else { > > - ib_smp_t *smp; > > - > > - /* Direct routed SMP */ > > - smp = (ib_smp_t *) mad; > > - OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5411: " > > - "DR SMP Send completed with error -- dropping\n" > > - "\t\t\tMethod 0x%X, Attr 0x%X, TID 0x%" PRIx64 > > - ", Hop Ptr: 0x%X\n", > > - mad->method, cl_ntoh16(mad->attr_id), > > - cl_ntoh64(mad->trans_id), smp->hop_ptr); > > One thing I just noticed in cobbling up the other approach for > comparison purposes is that the logging of the hop pointer was removed. > Should that be preserved ? Since we are printing the request MAD wouldn't the hop pointer always be 0? And while we are at it we should print the DLID/SLID since it could be combined routing. Ira > > -- Hal > > > - osm_dump_smp_dr_path(p_vend->p_log, smp, > > - OSM_LOG_ERROR); > > - } > > + OSM_LOG(p_vend->p_log, OSM_LOG_VERBOSE, "ERR 5410: " > > + "Receive Timeout on Send -- dropping " > > + "TID 0x%" PRIx64 "\n", cl_ntoh64(mad->trans_id)); > > > > if (!(p_req_madw = get_madw(p_vend, &mad->trans_id))) { > > OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, > > diff --git a/opensm/osm_helper.c b/opensm/osm_helper.c > > index f9f3d9d..b968679 100644 > > --- a/opensm/osm_helper.c > > +++ b/opensm/osm_helper.c > > @@ -2059,8 +2059,9 @@ void osm_dump_smp_dr_path(IN osm_log_t * p_log, IN const ib_smp_t * p_smp, > > char buf[BUF_SIZE]; > > unsigned n; > > > > - n = sprintf(buf, "Received SMP on a %u hop path: " > > - "Initial path = ", p_smp->hop_count); > > + n = sprintf(buf, " DR SMP (TID 0x%" PRIx64 ") on a %u hop path: " > > + "Initial path = ", > > + cl_ntoh64(p_smp->trans_id), p_smp->hop_count); > > n += sprint_uint8_arr(buf + n, sizeof(buf) - n, > > p_smp->initial_path, > > p_smp->hop_count + 1); > > diff --git a/opensm/osm_perfmgr.c b/opensm/osm_perfmgr.c > > index ded5a5e..fc3f74b 100644 > > --- a/opensm/osm_perfmgr.c > > +++ b/opensm/osm_perfmgr.c > > @@ -212,7 +212,9 @@ static void perfmgr_mad_send_err_callback(void *bind_context, > > p_mon_node = (monitored_node_t *) p_node; > > > > OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C02: %s (0x%" PRIx64 > > - ") port %u\n", p_mon_node->name, p_mon_node->guid, port); > > + ") port %u; DLID %u, TID 0x%" PRIx64 "\n", p_mon_node->name, > > + p_mon_node->guid, port, cl_ntoh16(p_madw->mad_addr.dest_lid), > > + cl_ntoh64(p_madw->p_mad->trans_id)); > > > > if (pm->subn->opt.perfmgr_redir && p_madw->status == IB_TIMEOUT) { > > /* First, find the node in the monitored map */ > > diff --git a/opensm/osm_sa_mad_ctrl.c b/opensm/osm_sa_mad_ctrl.c > > index bde88fa..4caead1 100644 > > --- a/opensm/osm_sa_mad_ctrl.c > > +++ b/opensm/osm_sa_mad_ctrl.c > > @@ -413,8 +413,18 @@ static void sa_mad_ctrl_send_err_callback(IN void *context, > > Retire the original request MAD. > > */ > > > > + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A0A: " > > + "SA MAD completed in error (%s): " > > + "%s(%s), attr_mod 0x%x, TID 0x%" PRIx64 ", DLID %u\n", > > + ib_get_err_str(p_madw->status), > > + ib_get_sa_method_str(p_madw->p_mad->method), > > + ib_get_sa_attr_str(p_madw->p_mad->attr_id), > > + cl_ntoh32(p_madw->p_mad->attr_mod), > > + cl_ntoh64(p_madw->p_mad->trans_id), > > + cl_ntoh16(p_madw->mad_addr.dest_lid)); > > + > > osm_dump_sa_mad(p_ctrl->p_log, osm_madw_get_sa_mad_ptr(p_madw), > > - OSM_LOG_ERROR); > > + OSM_LOG_VERBOSE); > > > > /* sm_mad_ctrl_update_wire_stats( p_ctrl ); */ > > > > diff --git a/opensm/osm_sm_mad_ctrl.c b/opensm/osm_sm_mad_ctrl.c > > index ee92c66..a3b444a 100644 > > --- a/opensm/osm_sm_mad_ctrl.c > > +++ b/opensm/osm_sm_mad_ctrl.c > > @@ -704,6 +704,7 @@ Exit: > > */ > > static void sm_mad_ctrl_send_err_cb(IN void *context, IN osm_madw_t * p_madw) > > { > > + char lidstr[8]; > > osm_sm_mad_ctrl_t *p_ctrl = context; > > ib_api_status_t status; > > ib_smp_t *p_smp; > > @@ -713,13 +714,24 @@ static void sm_mad_ctrl_send_err_cb(IN void *context, IN osm_madw_t * p_madw) > > CL_ASSERT(p_madw); > > > > p_smp = osm_madw_get_smp_ptr(p_madw); > > + > > + if (p_smp->mgmt_class == IB_MCLASS_SUBN_DIR) > > + lidstr[0] = '\0'; > > + else > > + snprintf(lidstr, 8, " DLID %u", > > + cl_ntoh16(p_madw->mad_addr.dest_lid)); > > + > > OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3113: " > > "MAD completed in error (%s): " > > - "%s(%s), attr_mod 0x%x, TID 0x%" PRIx64 "\n", > > + "%s(%s), attr_mod 0x%x, TID 0x%" PRIx64 " %s\n", > > ib_get_err_str(p_madw->status), > > ib_get_sm_method_str(p_smp->method), > > ib_get_sm_attr_str(p_smp->attr_id), cl_ntoh32(p_smp->attr_mod), > > - cl_ntoh64(p_smp->trans_id)); > > + cl_ntoh64(p_smp->trans_id), > > + lidstr); > > + > > + if (p_smp->mgmt_class == IB_MCLASS_SUBN_DIR) > > + osm_dump_smp_dr_path(p_ctrl->p_log, p_smp, OSM_LOG_ERROR); > > > > /* > > If this was a SubnSet MAD, then this error might indicate a problem > -- Ira Weiny Math Programmer/Computer Scientist Lawrence Livermore National Lab 925-423-8008 weiny2-i2BcT+NCU+M@public.gmane.org -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 4+ messages in thread
[parent not found: <20111216114557.27cef81e.weiny2-i2BcT+NCU+M@public.gmane.org>]
* Re: [PATCH V4] opensm: Move Error printing to MAD error call back functions. [not found] ` <20111216114557.27cef81e.weiny2-i2BcT+NCU+M@public.gmane.org> @ 2011-12-16 20:19 ` Hal Rosenstock 0 siblings, 0 replies; 4+ messages in thread From: Hal Rosenstock @ 2011-12-16 20:19 UTC (permalink / raw) To: Ira Weiny; +Cc: Alex Netes, linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org On 12/16/2011 2:45 PM, Ira Weiny wrote: > On Fri, 16 Dec 2011 11:27:40 -0800 > Hal Rosenstock <hal-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> wrote: > >> On 12/16/2011 12:49 PM, Ira Weiny wrote: >>> >>> Only print the transaction ID of timed out MAD's on VERBOSE. >>> >>> Signed-off-by: Ira Weiny <weiny2-i2BcT+NCU+M@public.gmane.org> >>> --- > > [snip] > >>> - } else { >>> - ib_smp_t *smp; >>> - >>> - /* Direct routed SMP */ >>> - smp = (ib_smp_t *) mad; >>> - OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5411: " >>> - "DR SMP Send completed with error -- dropping\n" >>> - "\t\t\tMethod 0x%X, Attr 0x%X, TID 0x%" PRIx64 >>> - ", Hop Ptr: 0x%X\n", >>> - mad->method, cl_ntoh16(mad->attr_id), >>> - cl_ntoh64(mad->trans_id), smp->hop_ptr); >> >> One thing I just noticed in cobbling up the other approach for >> comparison purposes is that the logging of the hop pointer was removed. >> Should that be preserved ? > > Since we are printing the request MAD wouldn't the hop pointer always be 0? Yes, that should be the case but I don't recall whether the original print always showed 0 when working off the returned umad rather than the request MAD. So I guess that should be eliminated in the vendor umad approach this... > And while we are at it we should print the DLID/SLID since it could be combined routing. AFAIK OpenSM doesn't currently support combined routing (only the diags do). This is a bigger change and that change should include any changes needed for logging. -- Hal > Ira > >> >> -- Hal >> >>> - osm_dump_smp_dr_path(p_vend->p_log, smp, >>> - OSM_LOG_ERROR); >>> - } >>> + OSM_LOG(p_vend->p_log, OSM_LOG_VERBOSE, "ERR 5410: " >>> + "Receive Timeout on Send -- dropping " >>> + "TID 0x%" PRIx64 "\n", cl_ntoh64(mad->trans_id)); >>> >>> if (!(p_req_madw = get_madw(p_vend, &mad->trans_id))) { >>> OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, >>> diff --git a/opensm/osm_helper.c b/opensm/osm_helper.c >>> index f9f3d9d..b968679 100644 >>> --- a/opensm/osm_helper.c >>> +++ b/opensm/osm_helper.c >>> @@ -2059,8 +2059,9 @@ void osm_dump_smp_dr_path(IN osm_log_t * p_log, IN const ib_smp_t * p_smp, >>> char buf[BUF_SIZE]; >>> unsigned n; >>> >>> - n = sprintf(buf, "Received SMP on a %u hop path: " >>> - "Initial path = ", p_smp->hop_count); >>> + n = sprintf(buf, " DR SMP (TID 0x%" PRIx64 ") on a %u hop path: " >>> + "Initial path = ", >>> + cl_ntoh64(p_smp->trans_id), p_smp->hop_count); >>> n += sprint_uint8_arr(buf + n, sizeof(buf) - n, >>> p_smp->initial_path, >>> p_smp->hop_count + 1); >>> diff --git a/opensm/osm_perfmgr.c b/opensm/osm_perfmgr.c >>> index ded5a5e..fc3f74b 100644 >>> --- a/opensm/osm_perfmgr.c >>> +++ b/opensm/osm_perfmgr.c >>> @@ -212,7 +212,9 @@ static void perfmgr_mad_send_err_callback(void *bind_context, >>> p_mon_node = (monitored_node_t *) p_node; >>> >>> OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C02: %s (0x%" PRIx64 >>> - ") port %u\n", p_mon_node->name, p_mon_node->guid, port); >>> + ") port %u; DLID %u, TID 0x%" PRIx64 "\n", p_mon_node->name, >>> + p_mon_node->guid, port, cl_ntoh16(p_madw->mad_addr.dest_lid), >>> + cl_ntoh64(p_madw->p_mad->trans_id)); >>> >>> if (pm->subn->opt.perfmgr_redir && p_madw->status == IB_TIMEOUT) { >>> /* First, find the node in the monitored map */ >>> diff --git a/opensm/osm_sa_mad_ctrl.c b/opensm/osm_sa_mad_ctrl.c >>> index bde88fa..4caead1 100644 >>> --- a/opensm/osm_sa_mad_ctrl.c >>> +++ b/opensm/osm_sa_mad_ctrl.c >>> @@ -413,8 +413,18 @@ static void sa_mad_ctrl_send_err_callback(IN void *context, >>> Retire the original request MAD. >>> */ >>> >>> + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A0A: " >>> + "SA MAD completed in error (%s): " >>> + "%s(%s), attr_mod 0x%x, TID 0x%" PRIx64 ", DLID %u\n", >>> + ib_get_err_str(p_madw->status), >>> + ib_get_sa_method_str(p_madw->p_mad->method), >>> + ib_get_sa_attr_str(p_madw->p_mad->attr_id), >>> + cl_ntoh32(p_madw->p_mad->attr_mod), >>> + cl_ntoh64(p_madw->p_mad->trans_id), >>> + cl_ntoh16(p_madw->mad_addr.dest_lid)); >>> + >>> osm_dump_sa_mad(p_ctrl->p_log, osm_madw_get_sa_mad_ptr(p_madw), >>> - OSM_LOG_ERROR); >>> + OSM_LOG_VERBOSE); >>> >>> /* sm_mad_ctrl_update_wire_stats( p_ctrl ); */ >>> >>> diff --git a/opensm/osm_sm_mad_ctrl.c b/opensm/osm_sm_mad_ctrl.c >>> index ee92c66..a3b444a 100644 >>> --- a/opensm/osm_sm_mad_ctrl.c >>> +++ b/opensm/osm_sm_mad_ctrl.c >>> @@ -704,6 +704,7 @@ Exit: >>> */ >>> static void sm_mad_ctrl_send_err_cb(IN void *context, IN osm_madw_t * p_madw) >>> { >>> + char lidstr[8]; >>> osm_sm_mad_ctrl_t *p_ctrl = context; >>> ib_api_status_t status; >>> ib_smp_t *p_smp; >>> @@ -713,13 +714,24 @@ static void sm_mad_ctrl_send_err_cb(IN void *context, IN osm_madw_t * p_madw) >>> CL_ASSERT(p_madw); >>> >>> p_smp = osm_madw_get_smp_ptr(p_madw); >>> + >>> + if (p_smp->mgmt_class == IB_MCLASS_SUBN_DIR) >>> + lidstr[0] = '\0'; >>> + else >>> + snprintf(lidstr, 8, " DLID %u", >>> + cl_ntoh16(p_madw->mad_addr.dest_lid)); >>> + >>> OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3113: " >>> "MAD completed in error (%s): " >>> - "%s(%s), attr_mod 0x%x, TID 0x%" PRIx64 "\n", >>> + "%s(%s), attr_mod 0x%x, TID 0x%" PRIx64 " %s\n", >>> ib_get_err_str(p_madw->status), >>> ib_get_sm_method_str(p_smp->method), >>> ib_get_sm_attr_str(p_smp->attr_id), cl_ntoh32(p_smp->attr_mod), >>> - cl_ntoh64(p_smp->trans_id)); >>> + cl_ntoh64(p_smp->trans_id), >>> + lidstr); >>> + >>> + if (p_smp->mgmt_class == IB_MCLASS_SUBN_DIR) >>> + osm_dump_smp_dr_path(p_ctrl->p_log, p_smp, OSM_LOG_ERROR); >>> >>> /* >>> If this was a SubnSet MAD, then this error might indicate a problem >> > > -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2011-12-16 20:19 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-12-16 17:49 [PATCH V4] opensm: Move Error printing to MAD error call back functions Ira Weiny
[not found] ` <20111216094934.0f7bd2ee.weiny2-i2BcT+NCU+M@public.gmane.org>
2011-12-16 19:27 ` Hal Rosenstock
[not found] ` <4EEB9BAC.5040104-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2011-12-16 19:45 ` Ira Weiny
[not found] ` <20111216114557.27cef81e.weiny2-i2BcT+NCU+M@public.gmane.org>
2011-12-16 20:19 ` Hal Rosenstock
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox