* [PATCH] opensm/osm_perfmgr.c: Output remote port on perfmgr error counter log messages
@ 2014-12-08 18:39 Albert Chu
[not found] ` <1418063952.20566.44.camel-akkeaxHeDKRliZ7u+bvwcg@public.gmane.org>
0 siblings, 1 reply; 2+ messages in thread
From: Albert Chu @ 2014-12-08 18:39 UTC (permalink / raw)
To: linux-rdma-u79uwXL29TY76Z2rM5mHXA
Outputting the remote node and port aids in servicing the fabric more
quickly for system administrators. In addition, it aids in fabric
monitoring efforts that scan the log.
Example output before this patch:
perfmgr_log_errors: ERR 543C: VL15Dropped : 17 : node "ibcore1 L101" (NodeGUID: 0x66a02e8001313) : port 11
Example output wth this patch:
perfmgr_log_errors: ERR 543C: VL15Dropped : 17 : node "ibcore1 L101" (NodeGUID: 0x66a02e8001313) : port 11 connected to "hype355 qib0" (NodeGUID: 0x40ed770000751100) : port 1
Signed-off-by: Albert L. Chu <chu11-i2BcT+NCU+M@public.gmane.org>
---
include/opensm/osm_perfmgr.h | 5 +++
opensm/osm_perfmgr.c | 59 +++++++++++++++++++++++++++++++++--------
2 files changed, 52 insertions(+), 12 deletions(-)
diff --git a/include/opensm/osm_perfmgr.h b/include/opensm/osm_perfmgr.h
index 44a278d..ec12eb6 100644
--- a/include/opensm/osm_perfmgr.h
+++ b/include/opensm/osm_perfmgr.h
@@ -105,6 +105,11 @@ typedef struct monitored_port {
/* ClassPortInfo fields */
boolean_t cpi_valid;
ib_net16_t cap_mask;
+ /* Remote end connected to */
+ boolean_t remote_valid;
+ uint64_t remote_guid;
+ char *remote_name;
+ uint8_t remote_port;
} monitored_port_t;
/* Node to store information about nodes being monitored */
diff --git a/opensm/osm_perfmgr.c b/opensm/osm_perfmgr.c
index d3fa1f7..4ab654b 100644
--- a/opensm/osm_perfmgr.c
+++ b/opensm/osm_perfmgr.c
@@ -144,6 +144,7 @@ static void remove_marked_nodes(osm_perfmgr_t * pm)
{
while (pm->remove_list) {
monitored_node_t *next = pm->remove_list->next;
+ int port;
cl_qmap_remove_item(&pm->monitored_map,
(cl_map_item_t *) (pm->remove_list));
@@ -155,6 +156,14 @@ static void remove_marked_nodes(osm_perfmgr_t * pm)
if (pm->remove_list->name)
free(pm->remove_list->name);
+
+ for (port = pm->remove_list->esp0 ? 0 : 1;
+ port < pm->remove_list->num_ports;
+ port++) {
+ if (pm->remove_list->port[port].remote_name)
+ free(pm->remove_list->port[port].remote_name);
+ }
+
free(pm->remove_list);
pm->remove_list = next;
}
@@ -554,11 +563,24 @@ static void collect_guids(cl_map_item_t * p_map_item, void *context)
ib_switch_info_is_enhanced_port0(&node->sw->
switch_info));
for (port = mon_node->esp0 ? 0 : 1; port < num_ports; port++) {
- mon_node->port[port].orig_lid = 0;
- mon_node->port[port].valid = FALSE;
- if (osm_physp_is_valid(&node->physp_table[port])) {
- mon_node->port[port].orig_lid = get_base_lid(node, port);
- mon_node->port[port].valid = TRUE;
+ monitored_port_t *mon_port = &mon_node->port[port];
+ osm_physp_t *p_physp = &node->physp_table[port];
+ osm_physp_t *p_remote_physp = p_physp->p_remote_physp;
+
+ mon_port->orig_lid = 0;
+ mon_port->valid = FALSE;
+ if (osm_physp_is_valid(p_physp)) {
+ mon_port->orig_lid = get_base_lid(node, port);
+ mon_port->valid = TRUE;
+ }
+ mon_port->remote_valid = FALSE;
+ mon_port->remote_name = NULL;
+ if (p_remote_physp && osm_physp_is_valid(p_remote_physp)) {
+ osm_node_t *p_remote_node = p_remote_physp->p_node;
+ mon_port->remote_valid = TRUE;
+ mon_port->remote_guid = p_remote_node->node_info.node_guid;
+ mon_port->remote_name = strdup(p_remote_node->print_desc);
+ mon_port->remote_port = p_remote_physp->port_num;
}
}
@@ -1429,13 +1451,26 @@ static void perfmgr_log_errors(osm_perfmgr_t * pm,
}
#define LOG_ERR_CNT(errname, errnum, counter_name) \
- if (reading->counter_name > prev_read.counter_name) \
- OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR %s: " \
- "%s : %" PRIu64 " : node " \
- "\"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u\n", \
- errnum, errname, \
- reading->counter_name - prev_read.counter_name, \
- mon_node->name, mon_node->guid, port);
+ if (reading->counter_name > prev_read.counter_name) { \
+ if (mon_node->port[port].remote_valid == TRUE) \
+ OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR %s: " \
+ "%s : %" PRIu64 " : node " \
+ "\"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u " \
+ "connected to \"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u\n", \
+ errnum, errname, \
+ reading->counter_name - prev_read.counter_name, \
+ mon_node->name, mon_node->guid, port, \
+ mon_node->port[port].remote_name, \
+ mon_node->port[port].remote_guid, \
+ mon_node->port[port].remote_port); \
+ else \
+ OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR %s: " \
+ "%s : %" PRIu64 " : node " \
+ "\"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u\n", \
+ errnum, errname, \
+ reading->counter_name - prev_read.counter_name, \
+ mon_node->name, mon_node->guid, port); \
+ }
LOG_ERR_CNT("SymbolErrorCounter", "5431", symbol_err_cnt);
LOG_ERR_CNT("LinkErrorRecoveryCounter", "5432", link_err_recover);
--
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH] opensm/osm_perfmgr.c: Output remote port on perfmgr error counter log messages
[not found] ` <1418063952.20566.44.camel-akkeaxHeDKRliZ7u+bvwcg@public.gmane.org>
@ 2014-12-09 12:50 ` Hal Rosenstock
0 siblings, 0 replies; 2+ messages in thread
From: Hal Rosenstock @ 2014-12-09 12:50 UTC (permalink / raw)
To: Albert Chu; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
On 12/8/2014 1:39 PM, Albert Chu wrote:
> Outputting the remote node and port aids in servicing the fabric more
> quickly for system administrators. In addition, it aids in fabric
> monitoring efforts that scan the log.
>
> Example output before this patch:
>
> perfmgr_log_errors: ERR 543C: VL15Dropped : 17 : node "ibcore1 L101" (NodeGUID: 0x66a02e8001313) : port 11
>
> Example output wth this patch:
>
> perfmgr_log_errors: ERR 543C: VL15Dropped : 17 : node "ibcore1 L101" (NodeGUID: 0x66a02e8001313) : port 11 connected to "hype355 qib0" (NodeGUID: 0x40ed770000751100) : port 1
>
> Signed-off-by: Albert L. Chu <chu11-i2BcT+NCU+M@public.gmane.org>
Thanks. Applied.
-- Hal
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2014-12-09 12:50 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-12-08 18:39 [PATCH] opensm/osm_perfmgr.c: Output remote port on perfmgr error counter log messages Albert Chu
[not found] ` <1418063952.20566.44.camel-akkeaxHeDKRliZ7u+bvwcg@public.gmane.org>
2014-12-09 12:50 ` Hal Rosenstock
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox