* [PATCH 01/06] opensm/perfmgr: issue ClassPortInfo as first query to each port.
@ 2013-02-21 21:33 Ira Weiny
[not found] ` <20130221133331.593f30977427848f4373b57a-i2BcT+NCU+M@public.gmane.org>
0 siblings, 1 reply; 8+ messages in thread
From: Ira Weiny @ 2013-02-21 21:33 UTC (permalink / raw)
To: Hal Rosenstock; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Signed-off-by: Ira Weiny <weiny2-i2BcT+NCU+M@public.gmane.org>
---
include/opensm/osm_perfmgr.h | 4 +
opensm/osm_perfmgr.c | 224 +++++++++++++++++++++++++++++++++---------
2 files changed, 183 insertions(+), 45 deletions(-)
diff --git a/include/opensm/osm_perfmgr.h b/include/opensm/osm_perfmgr.h
index 26b1ae6..3fa42d5 100644
--- a/include/opensm/osm_perfmgr.h
+++ b/include/opensm/osm_perfmgr.h
@@ -100,6 +100,9 @@ typedef struct monitored_port {
ib_net16_t lid;
ib_net16_t pkey;
ib_net32_t qp;
+ /* ClassPortInfo fields */
+ boolean_t cpi_valid;
+ ib_net16_t cap_mask;
} monitored_port_t;
/* Node to store information about nodes being monitored */
@@ -107,6 +110,7 @@ typedef struct monitored_node {
cl_map_item_t map_item;
struct monitored_node *next;
uint64_t guid;
+ uint8_t node_type;
boolean_t esp0;
char *name;
uint32_t num_ports;
diff --git a/opensm/osm_perfmgr.c b/opensm/osm_perfmgr.c
index 9bc1154..c71111f 100644
--- a/opensm/osm_perfmgr.c
+++ b/opensm/osm_perfmgr.c
@@ -356,17 +356,20 @@ static ib_net16_t get_lid(osm_node_t * p_node, uint8_t port,
return get_base_lid(p_node, port);
}
+
/**********************************************************************
- * Form and send the Port Counters MAD for a single port.
+ * Build a Performance Management class MAD
**********************************************************************/
-static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
- ib_net16_t dest_lid,
- ib_net32_t dest_qp, uint16_t pkey_ix,
- uint8_t port, uint8_t mad_method,
- osm_madw_context_t * p_context)
+static osm_madw_t *perfmgr_build_mad(osm_perfmgr_t * perfmgr,
+ ib_net16_t dest_lid,
+ uint8_t sl,
+ ib_net32_t dest_qp,
+ uint16_t pkey_ix,
+ uint8_t mad_method,
+ ib_net16_t attr_id,
+ osm_madw_context_t * p_context,
+ ib_perfmgt_mad_t ** p_pm_mad)
{
- ib_api_status_t status = IB_SUCCESS;
- ib_port_counters_t *port_counter = NULL;
ib_perfmgt_mad_t *pm_mad = NULL;
osm_madw_t *p_madw = NULL;
@@ -375,7 +378,7 @@ static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
p_madw = osm_mad_pool_get(perfmgr->mad_pool, perfmgr->bind_handle,
MAD_BLOCK_SIZE, NULL);
if (p_madw == NULL)
- return IB_INSUFFICIENT_MEMORY;
+ return NULL;
pm_mad = osm_madw_get_perfmgt_mad_ptr(p_madw);
@@ -393,29 +396,38 @@ static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
pm_mad->header.trans_id =
cl_hton64((uint64_t) cl_atomic_inc(&perfmgr->trans_id) &
(uint64_t) (0xFFFFFFFF));
- pm_mad->header.attr_id = IB_MAD_ATTR_PORT_CNTRS;
+ pm_mad->header.attr_id = attr_id;
pm_mad->header.resv = 0;
pm_mad->header.attr_mod = 0;
- port_counter = (ib_port_counters_t *) & pm_mad->data;
- memset(port_counter, 0, sizeof(*port_counter));
- port_counter->port_select = port;
- port_counter->counter_select = 0xFFFF;
-
p_madw->mad_addr.dest_lid = dest_lid;
p_madw->mad_addr.addr_type.gsi.remote_qp = dest_qp;
p_madw->mad_addr.addr_type.gsi.remote_qkey =
cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY);
p_madw->mad_addr.addr_type.gsi.pkey_ix = pkey_ix;
- p_madw->mad_addr.addr_type.gsi.service_level = 0;
+ p_madw->mad_addr.addr_type.gsi.service_level = sl;
p_madw->mad_addr.addr_type.gsi.global_route = FALSE;
p_madw->resp_expected = TRUE;
if (p_context)
p_madw->context = *p_context;
- status = osm_vendor_send(perfmgr->bind_handle, p_madw, TRUE);
+ if (p_pm_mad)
+ *p_pm_mad = pm_mad;
+
+ OSM_LOG_EXIT(perfmgr->log);
+
+ return (p_madw);
+}
+/**********************************************************************
+ * Send a Performance Management class MAD
+ **********************************************************************/
+static ib_api_status_t perfmgr_send_mad(osm_perfmgr_t *perfmgr,
+ osm_madw_t * const p_madw)
+{
+ ib_api_status_t status = osm_vendor_send(perfmgr->bind_handle, p_madw,
+ TRUE);
if (status == IB_SUCCESS) {
/* pause thread if there are too many outstanding requests */
cl_atomic_inc(&(perfmgr->outstanding_queries));
@@ -427,6 +439,39 @@ static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
}
perfmgr->sweep_state = PERFMGR_SWEEP_ACTIVE;
}
+ return (status);
+}
+
+
+/**********************************************************************
+ * Form and send the PortCounters MAD for a single port.
+ **********************************************************************/
+static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
+ ib_net16_t dest_lid,
+ ib_net32_t dest_qp, uint16_t pkey_ix,
+ uint8_t port, uint8_t mad_method,
+ osm_madw_context_t * p_context,
+ uint8_t sl)
+{
+ ib_api_status_t status = IB_SUCCESS;
+ ib_port_counters_t *port_counter = NULL;
+ ib_perfmgt_mad_t *pm_mad = NULL;
+ osm_madw_t *p_madw = NULL;
+
+ OSM_LOG_ENTER(perfmgr->log);
+
+ p_madw = perfmgr_build_mad(perfmgr, dest_lid, sl, dest_qp, pkey_ix,
+ mad_method, IB_MAD_ATTR_PORT_CNTRS, p_context,
+ &pm_mad);
+ if (p_madw == NULL)
+ return IB_INSUFFICIENT_MEMORY;
+
+ port_counter = (ib_port_counters_t *) & pm_mad->data;
+ memset(port_counter, 0, sizeof(*port_counter));
+ port_counter->port_select = port;
+ port_counter->counter_select = 0xFFFF;
+
+ status = perfmgr_send_mad(perfmgr, p_madw);
OSM_LOG_EXIT(perfmgr->log);
return status;
@@ -469,6 +514,7 @@ static void collect_guids(cl_map_item_t * p_map_item, void *context)
mon_node->guid = node_guid;
mon_node->name = strdup(node->print_desc);
mon_node->num_ports = num_ports;
+ mon_node->node_type = node->node_info.node_type;
/* check for enhanced switch port 0 */
mon_node->esp0 = (node->sw &&
ib_switch_info_is_enhanced_port0(&node->sw->
@@ -491,6 +537,35 @@ Exit:
}
/**********************************************************************
+ * Form and send the ClassPortInfo MAD for a single port.
+ **********************************************************************/
+static ib_api_status_t perfmgr_send_cpi_mad(osm_perfmgr_t * pm,
+ ib_net16_t dest_lid,
+ ib_net32_t dest_qp,
+ uint16_t pkey_ix,
+ uint8_t port,
+ osm_madw_context_t * p_context,
+ uint8_t sl)
+{
+ ib_api_status_t status = IB_SUCCESS;
+ osm_madw_t *p_madw = NULL;
+
+ OSM_LOG_ENTER(pm->log);
+
+ p_madw = perfmgr_build_mad(pm, dest_lid, sl, dest_qp,
+ pkey_ix, IB_MAD_METHOD_GET,
+ IB_MAD_ATTR_CLASS_PORT_INFO, p_context,
+ NULL);
+ if (p_madw == NULL)
+ return IB_INSUFFICIENT_MEMORY;
+
+ status = perfmgr_send_mad(pm, p_madw);
+
+ OSM_LOG_EXIT(pm->log);
+ return status;
+}
+
+/**********************************************************************
* query the Port Counters of all the nodes in the subnet.
**********************************************************************/
static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context)
@@ -557,22 +632,42 @@ static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context)
mad_context.perfmgr_context.node_guid = node_guid;
mad_context.perfmgr_context.port = port;
mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_GET;
+
+ if (!mon_node->port[port].cpi_valid) {
+ status = perfmgr_send_cpi_mad(pm, lid, remote_qp,
+ mon_node->port[port].pkey_ix,
+ port, &mad_context,
+ 0); /* FIXME SL != 0 */
+ if (status != IB_SUCCESS)
+ OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5410: "
+ "Failed to issue ClassPortInfo query "
+ "for node 0x%" PRIx64
+ " port %d (%s)\n",
+ node->node_info.node_guid, port,
+ node->print_desc);
+ if (mon_node->node_type == IB_NODE_TYPE_SWITCH)
+ goto Exit; /* only need to issue 1 CPI query
+ for switches */
+ } else {
+
#ifdef ENABLE_OSM_PERF_MGR_PROFILE
- gettimeofday(&mad_context.perfmgr_context.query_start, NULL);
+ gettimeofday(&mad_context.perfmgr_context.query_start, NULL);
#endif
- OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%"
- PRIx64 " port %d (lid %u) (%s)\n", node_guid, port,
- cl_ntoh16(lid), node->print_desc);
- status = perfmgr_send_pc_mad(pm, lid, remote_qp,
- mon_node->port[port].pkey_ix,
- port, IB_MAD_METHOD_GET,
- &mad_context);
- if (status != IB_SUCCESS)
- OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: "
- "Failed to issue port counter query for node 0x%"
- PRIx64 " port %d (%s)\n",
- node->node_info.node_guid, port,
- node->print_desc);
+ OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%"
+ PRIx64 " port %d (lid %u) (%s)\n", node_guid, port,
+ cl_ntoh16(lid), node->print_desc);
+ status = perfmgr_send_pc_mad(pm, lid, remote_qp,
+ mon_node->port[port].pkey_ix,
+ port, IB_MAD_METHOD_GET,
+ &mad_context,
+ 0); /* FIXME SL != 0 */
+ if (status != IB_SUCCESS)
+ OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: "
+ "Failed to issue port counter query for node 0x%"
+ PRIx64 " port %d (%s)\n",
+ node->node_info.node_guid, port,
+ node->print_desc);
+ }
}
Exit:
cl_plock_release(&pm->osm->lock);
@@ -1053,7 +1148,8 @@ static void perfmgr_check_overflow(osm_perfmgr_t * pm,
/* clear port counters */
status = perfmgr_send_pc_mad(pm, lid, remote_qp, pkey_ix,
port, IB_MAD_METHOD_SET,
- &mad_context);
+ &mad_context,
+ 0); /* FIXME SL != 0 */
if (status != IB_SUCCESS)
OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5411: "
"Failed to send clear counters MAD for %s (0x%"
@@ -1187,6 +1283,7 @@ static void pc_recv_process(void *context, void *data)
monitored_node_t *p_mon_node;
int16_t pkey_ix = 0;
boolean_t valid = TRUE;
+ ib_class_port_info_t *cpi = NULL;
OSM_LOG_ENTER(pm->log);
@@ -1209,15 +1306,44 @@ static void pc_recv_process(void *context, void *data)
CL_ASSERT(p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS ||
p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO);
+ /* capture CLASS_PORT_INFO data */
+ if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) {
+ cpi = (ib_class_port_info_t *) &
+ (osm_madw_get_perfmgt_mad_ptr(p_madw)->data);
+
+ cl_plock_acquire(&pm->osm->lock);
+ /* validate port number */
+ if (port >= p_mon_node->num_ports) {
+ cl_plock_release(&pm->osm->lock);
+ OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5413: "
+ "Invalid port num %d for GUID 0x%016"
+ PRIx64 " num ports %d\n", port, node_guid,
+ p_mon_node->num_ports);
+ goto Exit;
+ }
+ if (p_mon_node->node_type == IB_NODE_TYPE_SWITCH) {
+ int i = 0;
+ for (i = p_mon_node->esp0 ? 0 : 1;
+ i < p_mon_node->num_ports;
+ i++) {
+ p_mon_node->port[i].cap_mask = cpi->cap_mask;
+ p_mon_node->port[i].cpi_valid = TRUE;
+ }
+ } else {
+ p_mon_node->port[port].cap_mask = cpi->cap_mask;
+ p_mon_node->port[port].cpi_valid = TRUE;
+ }
+ cl_plock_release(&pm->osm->lock);
+ }
+
/* Response could also be redirection (IBM eHCA PMA does this) */
- if (p_mad->status & IB_MAD_STATUS_REDIRECT &&
- p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) {
+ if (p_mad->status & IB_MAD_STATUS_REDIRECT) {
char gid_str[INET6_ADDRSTRLEN];
- ib_class_port_info_t *cpi =
- (ib_class_port_info_t *) &
- (osm_madw_get_perfmgt_mad_ptr(p_madw)->data);
ib_api_status_t status;
+ CL_ASSERT(cpi); /* Redirect should have returned CPI
+ (processed in previous block) */
+
OSM_LOG(pm->log, OSM_LOG_VERBOSE,
"Redirection to LID %u GID %s QP 0x%x received\n",
cl_ntoh16(cpi->redir_lid),
@@ -1292,20 +1418,28 @@ static void pc_recv_process(void *context, void *data)
if (!valid)
goto Exit;
- /* Finally, reissue the query to the redirected location */
- status = perfmgr_send_pc_mad(pm, cpi->redir_lid, cpi->redir_qp,
- pkey_ix, port,
- mad_context->perfmgr_context.
- mad_method, mad_context);
+ /* Finally, issue a CPI query to the redirected location */
+ p_mon_node->port[port].cpi_valid = FALSE;
+ status = perfmgr_send_cpi_mad(pm, cpi->redir_lid,
+ cpi->redir_qp, pkey_ix,
+ port, mad_context,
+ 0); /* FIXME SL != 0 */
if (status != IB_SUCCESS)
OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5414: "
- "Failed to send redirected MAD with method 0x%x for node 0x%"
- PRIx64 " port %d\n",
+ "Failed to send redirected MAD "
+ "with method 0x%x for node %s "
+ "(NodeGuid 0x%" PRIx64 ") port %d\n",
mad_context->perfmgr_context.mad_method,
- node_guid, port);
+ p_mon_node->name, node_guid, port);
goto Exit;
}
+ /* ClassPortInfo needed to process optional Redirection
+ * now exit normally
+ */
+ if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO)
+ goto Exit;
+
perfmgr_db_fill_err_read(wire_read, &err_reading);
/* FIXME separate query for extended counters if they are supported
* on the port.
--
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 8+ messages in thread[parent not found: <20130221133331.593f30977427848f4373b57a-i2BcT+NCU+M@public.gmane.org>]
* Re: [PATCH 01/06] opensm/perfmgr: issue ClassPortInfo as first query to each port. [not found] ` <20130221133331.593f30977427848f4373b57a-i2BcT+NCU+M@public.gmane.org> @ 2013-02-26 15:03 ` Hal Rosenstock [not found] ` <512CCEC7.70304-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> 0 siblings, 1 reply; 8+ messages in thread From: Hal Rosenstock @ 2013-02-26 15:03 UTC (permalink / raw) To: Ira Weiny Cc: Hal Rosenstock, linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org On 2/21/2013 4:33 PM, Ira Weiny wrote: > So 2 round trips are now needed for first time ports now to determine whether or not extended counters are supported. I don't see a better way around this. > > Signed-off-by: Ira Weiny <weiny2-i2BcT+NCU+M@public.gmane.org> > --- > include/opensm/osm_perfmgr.h | 4 + > opensm/osm_perfmgr.c | 224 +++++++++++++++++++++++++++++++++--------- > 2 files changed, 183 insertions(+), 45 deletions(-) > > diff --git a/include/opensm/osm_perfmgr.h b/include/opensm/osm_perfmgr.h > index 26b1ae6..3fa42d5 100644 > --- a/include/opensm/osm_perfmgr.h > +++ b/include/opensm/osm_perfmgr.h > @@ -100,6 +100,9 @@ typedef struct monitored_port { > ib_net16_t lid; > ib_net16_t pkey; > ib_net32_t qp; > + /* ClassPortInfo fields */ > + boolean_t cpi_valid; > + ib_net16_t cap_mask; > } monitored_port_t; > > /* Node to store information about nodes being monitored */ > @@ -107,6 +110,7 @@ typedef struct monitored_node { > cl_map_item_t map_item; > struct monitored_node *next; > uint64_t guid; > + uint8_t node_type; > boolean_t esp0; > char *name; > uint32_t num_ports; > diff --git a/opensm/osm_perfmgr.c b/opensm/osm_perfmgr.c > index 9bc1154..c71111f 100644 > --- a/opensm/osm_perfmgr.c > +++ b/opensm/osm_perfmgr.c > @@ -356,17 +356,20 @@ static ib_net16_t get_lid(osm_node_t * p_node, uint8_t port, > return get_base_lid(p_node, port); > } > > + > /********************************************************************** > - * Form and send the Port Counters MAD for a single port. > + * Build a Performance Management class MAD > **********************************************************************/ > -static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr, > - ib_net16_t dest_lid, > - ib_net32_t dest_qp, uint16_t pkey_ix, > - uint8_t port, uint8_t mad_method, > - osm_madw_context_t * p_context) > +static osm_madw_t *perfmgr_build_mad(osm_perfmgr_t * perfmgr, > + ib_net16_t dest_lid, > + uint8_t sl, > + ib_net32_t dest_qp, > + uint16_t pkey_ix, > + uint8_t mad_method, > + ib_net16_t attr_id, > + osm_madw_context_t * p_context, > + ib_perfmgt_mad_t ** p_pm_mad) > { > - ib_api_status_t status = IB_SUCCESS; > - ib_port_counters_t *port_counter = NULL; > ib_perfmgt_mad_t *pm_mad = NULL; > osm_madw_t *p_madw = NULL; > > @@ -375,7 +378,7 @@ static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr, > p_madw = osm_mad_pool_get(perfmgr->mad_pool, perfmgr->bind_handle, > MAD_BLOCK_SIZE, NULL); > if (p_madw == NULL) > - return IB_INSUFFICIENT_MEMORY; > + return NULL; > > pm_mad = osm_madw_get_perfmgt_mad_ptr(p_madw); > > @@ -393,29 +396,38 @@ static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr, > pm_mad->header.trans_id = > cl_hton64((uint64_t) cl_atomic_inc(&perfmgr->trans_id) & > (uint64_t) (0xFFFFFFFF)); > - pm_mad->header.attr_id = IB_MAD_ATTR_PORT_CNTRS; > + pm_mad->header.attr_id = attr_id; > pm_mad->header.resv = 0; > pm_mad->header.attr_mod = 0; > > - port_counter = (ib_port_counters_t *) & pm_mad->data; > - memset(port_counter, 0, sizeof(*port_counter)); > - port_counter->port_select = port; > - port_counter->counter_select = 0xFFFF; > - > p_madw->mad_addr.dest_lid = dest_lid; > p_madw->mad_addr.addr_type.gsi.remote_qp = dest_qp; > p_madw->mad_addr.addr_type.gsi.remote_qkey = > cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY); > p_madw->mad_addr.addr_type.gsi.pkey_ix = pkey_ix; > - p_madw->mad_addr.addr_type.gsi.service_level = 0; > + p_madw->mad_addr.addr_type.gsi.service_level = sl; > p_madw->mad_addr.addr_type.gsi.global_route = FALSE; > p_madw->resp_expected = TRUE; > > if (p_context) > p_madw->context = *p_context; > > - status = osm_vendor_send(perfmgr->bind_handle, p_madw, TRUE); > + if (p_pm_mad) > + *p_pm_mad = pm_mad; Nit: formatting (tabs rather than spaces) > + > + OSM_LOG_EXIT(perfmgr->log); > + > + return (p_madw); > +} > > +/********************************************************************** > + * Send a Performance Management class MAD > + **********************************************************************/ > +static ib_api_status_t perfmgr_send_mad(osm_perfmgr_t *perfmgr, > + osm_madw_t * const p_madw) > +{ > + ib_api_status_t status = osm_vendor_send(perfmgr->bind_handle, p_madw, > + TRUE); > if (status == IB_SUCCESS) { > /* pause thread if there are too many outstanding requests */ > cl_atomic_inc(&(perfmgr->outstanding_queries)); > @@ -427,6 +439,39 @@ static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr, > } > perfmgr->sweep_state = PERFMGR_SWEEP_ACTIVE; > } > + return (status); > +} > + > + > +/********************************************************************** > + * Form and send the PortCounters MAD for a single port. > + **********************************************************************/ > +static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr, > + ib_net16_t dest_lid, > + ib_net32_t dest_qp, uint16_t pkey_ix, > + uint8_t port, uint8_t mad_method, > + osm_madw_context_t * p_context, > + uint8_t sl) > +{ > + ib_api_status_t status = IB_SUCCESS; > + ib_port_counters_t *port_counter = NULL; > + ib_perfmgt_mad_t *pm_mad = NULL; > + osm_madw_t *p_madw = NULL; > + > + OSM_LOG_ENTER(perfmgr->log); > + > + p_madw = perfmgr_build_mad(perfmgr, dest_lid, sl, dest_qp, pkey_ix, > + mad_method, IB_MAD_ATTR_PORT_CNTRS, p_context, > + &pm_mad); > + if (p_madw == NULL) > + return IB_INSUFFICIENT_MEMORY; > + > + port_counter = (ib_port_counters_t *) & pm_mad->data; > + memset(port_counter, 0, sizeof(*port_counter)); > + port_counter->port_select = port; > + port_counter->counter_select = 0xFFFF; > + > + status = perfmgr_send_mad(perfmgr, p_madw); > > OSM_LOG_EXIT(perfmgr->log); > return status; > @@ -469,6 +514,7 @@ static void collect_guids(cl_map_item_t * p_map_item, void *context) > mon_node->guid = node_guid; > mon_node->name = strdup(node->print_desc); > mon_node->num_ports = num_ports; > + mon_node->node_type = node->node_info.node_type; > /* check for enhanced switch port 0 */ > mon_node->esp0 = (node->sw && > ib_switch_info_is_enhanced_port0(&node->sw-> > @@ -491,6 +537,35 @@ Exit: > } > > /********************************************************************** > + * Form and send the ClassPortInfo MAD for a single port. > + **********************************************************************/ > +static ib_api_status_t perfmgr_send_cpi_mad(osm_perfmgr_t * pm, > + ib_net16_t dest_lid, > + ib_net32_t dest_qp, > + uint16_t pkey_ix, > + uint8_t port, > + osm_madw_context_t * p_context, > + uint8_t sl) > +{ > + ib_api_status_t status = IB_SUCCESS; > + osm_madw_t *p_madw = NULL; > + > + OSM_LOG_ENTER(pm->log); > + > + p_madw = perfmgr_build_mad(pm, dest_lid, sl, dest_qp, > + pkey_ix, IB_MAD_METHOD_GET, > + IB_MAD_ATTR_CLASS_PORT_INFO, p_context, > + NULL); > + if (p_madw == NULL) > + return IB_INSUFFICIENT_MEMORY; > + > + status = perfmgr_send_mad(pm, p_madw); > + > + OSM_LOG_EXIT(pm->log); > + return status; > +} > + > +/********************************************************************** > * query the Port Counters of all the nodes in the subnet. > **********************************************************************/ > static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context) > @@ -557,22 +632,42 @@ static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context) > mad_context.perfmgr_context.node_guid = node_guid; > mad_context.perfmgr_context.port = port; > mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_GET; > + > + if (!mon_node->port[port].cpi_valid) { > + status = perfmgr_send_cpi_mad(pm, lid, remote_qp, > + mon_node->port[port].pkey_ix, > + port, &mad_context, > + 0); /* FIXME SL != 0 */ > + if (status != IB_SUCCESS) > + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5410: " > + "Failed to issue ClassPortInfo query " > + "for node 0x%" PRIx64 > + " port %d (%s)\n", > + node->node_info.node_guid, port, > + node->print_desc); > + if (mon_node->node_type == IB_NODE_TYPE_SWITCH) > + goto Exit; /* only need to issue 1 CPI query > + for switches */ Have you tried switches with base SP0 ? > + } else { > + > #ifdef ENABLE_OSM_PERF_MGR_PROFILE > - gettimeofday(&mad_context.perfmgr_context.query_start, NULL); > + gettimeofday(&mad_context.perfmgr_context.query_start, NULL); > #endif > - OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%" > - PRIx64 " port %d (lid %u) (%s)\n", node_guid, port, > - cl_ntoh16(lid), node->print_desc); > - status = perfmgr_send_pc_mad(pm, lid, remote_qp, > - mon_node->port[port].pkey_ix, > - port, IB_MAD_METHOD_GET, > - &mad_context); > - if (status != IB_SUCCESS) > - OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: " > - "Failed to issue port counter query for node 0x%" > - PRIx64 " port %d (%s)\n", > - node->node_info.node_guid, port, > - node->print_desc); > + OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%" > + PRIx64 " port %d (lid %u) (%s)\n", node_guid, port, > + cl_ntoh16(lid), node->print_desc); > + status = perfmgr_send_pc_mad(pm, lid, remote_qp, > + mon_node->port[port].pkey_ix, > + port, IB_MAD_METHOD_GET, > + &mad_context, > + 0); /* FIXME SL != 0 */ > + if (status != IB_SUCCESS) > + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: " > + "Failed to issue port counter query for node 0x%" > + PRIx64 " port %d (%s)\n", > + node->node_info.node_guid, port, > + node->print_desc); > + } > } > Exit: > cl_plock_release(&pm->osm->lock); > @@ -1053,7 +1148,8 @@ static void perfmgr_check_overflow(osm_perfmgr_t * pm, > /* clear port counters */ > status = perfmgr_send_pc_mad(pm, lid, remote_qp, pkey_ix, > port, IB_MAD_METHOD_SET, > - &mad_context); > + &mad_context, > + 0); /* FIXME SL != 0 */ > if (status != IB_SUCCESS) > OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5411: " > "Failed to send clear counters MAD for %s (0x%" > @@ -1187,6 +1283,7 @@ static void pc_recv_process(void *context, void *data) > monitored_node_t *p_mon_node; > int16_t pkey_ix = 0; > boolean_t valid = TRUE; > + ib_class_port_info_t *cpi = NULL; > > OSM_LOG_ENTER(pm->log); > > @@ -1209,15 +1306,44 @@ static void pc_recv_process(void *context, void *data) > CL_ASSERT(p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS || > p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO); > > + /* capture CLASS_PORT_INFO data */ > + if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) { > + cpi = (ib_class_port_info_t *) & > + (osm_madw_get_perfmgt_mad_ptr(p_madw)->data); > + > + cl_plock_acquire(&pm->osm->lock); > + /* validate port number */ > + if (port >= p_mon_node->num_ports) { > + cl_plock_release(&pm->osm->lock); > + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5413: " > + "Invalid port num %d for GUID 0x%016" > + PRIx64 " num ports %d\n", port, node_guid, > + p_mon_node->num_ports); > + goto Exit; > + } > + if (p_mon_node->node_type == IB_NODE_TYPE_SWITCH) { > + int i = 0; > + for (i = p_mon_node->esp0 ? 0 : 1; > + i < p_mon_node->num_ports; > + i++) { > + p_mon_node->port[i].cap_mask = cpi->cap_mask; > + p_mon_node->port[i].cpi_valid = TRUE; > + } > + } else { > + p_mon_node->port[port].cap_mask = cpi->cap_mask; > + p_mon_node->port[port].cpi_valid = TRUE; > + } > + cl_plock_release(&pm->osm->lock); > + } > + > /* Response could also be redirection (IBM eHCA PMA does this) */ > - if (p_mad->status & IB_MAD_STATUS_REDIRECT && > - p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) { > + if (p_mad->status & IB_MAD_STATUS_REDIRECT) { Shouldn't this be part of if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) clause ? -- Hal > char gid_str[INET6_ADDRSTRLEN]; > - ib_class_port_info_t *cpi = > - (ib_class_port_info_t *) & > - (osm_madw_get_perfmgt_mad_ptr(p_madw)->data); > ib_api_status_t status; > > + CL_ASSERT(cpi); /* Redirect should have returned CPI > + (processed in previous block) */ > + > OSM_LOG(pm->log, OSM_LOG_VERBOSE, > "Redirection to LID %u GID %s QP 0x%x received\n", > cl_ntoh16(cpi->redir_lid), > @@ -1292,20 +1418,28 @@ static void pc_recv_process(void *context, void *data) > if (!valid) > goto Exit; > > - /* Finally, reissue the query to the redirected location */ > - status = perfmgr_send_pc_mad(pm, cpi->redir_lid, cpi->redir_qp, > - pkey_ix, port, > - mad_context->perfmgr_context. > - mad_method, mad_context); > + /* Finally, issue a CPI query to the redirected location */ > + p_mon_node->port[port].cpi_valid = FALSE; > + status = perfmgr_send_cpi_mad(pm, cpi->redir_lid, > + cpi->redir_qp, pkey_ix, > + port, mad_context, > + 0); /* FIXME SL != 0 */ > if (status != IB_SUCCESS) > OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5414: " > - "Failed to send redirected MAD with method 0x%x for node 0x%" > - PRIx64 " port %d\n", > + "Failed to send redirected MAD " > + "with method 0x%x for node %s " > + "(NodeGuid 0x%" PRIx64 ") port %d\n", > mad_context->perfmgr_context.mad_method, > - node_guid, port); > + p_mon_node->name, node_guid, port); > goto Exit; > } > > + /* ClassPortInfo needed to process optional Redirection > + * now exit normally > + */ > + if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) > + goto Exit; > + > perfmgr_db_fill_err_read(wire_read, &err_reading); > /* FIXME separate query for extended counters if they are supported > * on the port. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 8+ messages in thread
[parent not found: <512CCEC7.70304-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>]
* Re: [PATCH 01/06] opensm/perfmgr: issue ClassPortInfo as first query to each port. [not found] ` <512CCEC7.70304-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> @ 2013-02-26 18:58 ` Ira Weiny [not found] ` <20130226105803.c3bae4bdd1d2af03bce373db-i2BcT+NCU+M@public.gmane.org> 0 siblings, 1 reply; 8+ messages in thread From: Ira Weiny @ 2013-02-26 18:58 UTC (permalink / raw) To: Hal Rosenstock Cc: Hal Rosenstock, linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org On Tue, 26 Feb 2013 10:03:35 -0500 Hal Rosenstock <hal-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> wrote: > On 2/21/2013 4:33 PM, Ira Weiny wrote: > > > > So 2 round trips are now needed for first time ports now to determine > whether or not extended counters are supported. I don't see a better way > around this. Nope, this is the best we can do with the current spec. > > > > > Signed-off-by: Ira Weiny <weiny2-i2BcT+NCU+M@public.gmane.org> > > --- > > include/opensm/osm_perfmgr.h | 4 + > > opensm/osm_perfmgr.c | 224 +++++++++++++++++++++++++++++++++--------- > > 2 files changed, 183 insertions(+), 45 deletions(-) > > > > diff --git a/include/opensm/osm_perfmgr.h b/include/opensm/osm_perfmgr.h > > index 26b1ae6..3fa42d5 100644 > > --- a/include/opensm/osm_perfmgr.h > > +++ b/include/opensm/osm_perfmgr.h > > @@ -100,6 +100,9 @@ typedef struct monitored_port { > > ib_net16_t lid; > > ib_net16_t pkey; > > ib_net32_t qp; > > + /* ClassPortInfo fields */ > > + boolean_t cpi_valid; > > + ib_net16_t cap_mask; > > } monitored_port_t; > > > > /* Node to store information about nodes being monitored */ > > @@ -107,6 +110,7 @@ typedef struct monitored_node { > > cl_map_item_t map_item; > > struct monitored_node *next; > > uint64_t guid; > > + uint8_t node_type; > > boolean_t esp0; > > char *name; > > uint32_t num_ports; > > diff --git a/opensm/osm_perfmgr.c b/opensm/osm_perfmgr.c > > index 9bc1154..c71111f 100644 > > --- a/opensm/osm_perfmgr.c > > +++ b/opensm/osm_perfmgr.c > > @@ -356,17 +356,20 @@ static ib_net16_t get_lid(osm_node_t * p_node, uint8_t port, > > return get_base_lid(p_node, port); > > } > > > > + > > /********************************************************************** > > - * Form and send the Port Counters MAD for a single port. > > + * Build a Performance Management class MAD > > **********************************************************************/ > > -static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr, > > - ib_net16_t dest_lid, > > - ib_net32_t dest_qp, uint16_t pkey_ix, > > - uint8_t port, uint8_t mad_method, > > - osm_madw_context_t * p_context) > > +static osm_madw_t *perfmgr_build_mad(osm_perfmgr_t * perfmgr, > > + ib_net16_t dest_lid, > > + uint8_t sl, > > + ib_net32_t dest_qp, > > + uint16_t pkey_ix, > > + uint8_t mad_method, > > + ib_net16_t attr_id, > > + osm_madw_context_t * p_context, > > + ib_perfmgt_mad_t ** p_pm_mad) > > { > > - ib_api_status_t status = IB_SUCCESS; > > - ib_port_counters_t *port_counter = NULL; > > ib_perfmgt_mad_t *pm_mad = NULL; > > osm_madw_t *p_madw = NULL; > > > > @@ -375,7 +378,7 @@ static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr, > > p_madw = osm_mad_pool_get(perfmgr->mad_pool, perfmgr->bind_handle, > > MAD_BLOCK_SIZE, NULL); > > if (p_madw == NULL) > > - return IB_INSUFFICIENT_MEMORY; > > + return NULL; > > > > pm_mad = osm_madw_get_perfmgt_mad_ptr(p_madw); > > > > @@ -393,29 +396,38 @@ static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr, > > pm_mad->header.trans_id = > > cl_hton64((uint64_t) cl_atomic_inc(&perfmgr->trans_id) & > > (uint64_t) (0xFFFFFFFF)); > > - pm_mad->header.attr_id = IB_MAD_ATTR_PORT_CNTRS; > > + pm_mad->header.attr_id = attr_id; > > pm_mad->header.resv = 0; > > pm_mad->header.attr_mod = 0; > > > > - port_counter = (ib_port_counters_t *) & pm_mad->data; > > - memset(port_counter, 0, sizeof(*port_counter)); > > - port_counter->port_select = port; > > - port_counter->counter_select = 0xFFFF; > > - > > p_madw->mad_addr.dest_lid = dest_lid; > > p_madw->mad_addr.addr_type.gsi.remote_qp = dest_qp; > > p_madw->mad_addr.addr_type.gsi.remote_qkey = > > cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY); > > p_madw->mad_addr.addr_type.gsi.pkey_ix = pkey_ix; > > - p_madw->mad_addr.addr_type.gsi.service_level = 0; > > + p_madw->mad_addr.addr_type.gsi.service_level = sl; > > p_madw->mad_addr.addr_type.gsi.global_route = FALSE; > > p_madw->resp_expected = TRUE; > > > > if (p_context) > > p_madw->context = *p_context; > > > > - status = osm_vendor_send(perfmgr->bind_handle, p_madw, TRUE); > > + if (p_pm_mad) > > + *p_pm_mad = pm_mad; > > Nit: formatting (tabs rather than spaces) Got it in V2 > > > + > > + OSM_LOG_EXIT(perfmgr->log); > > + > > + return (p_madw); > > +} > > > > +/********************************************************************** > > + * Send a Performance Management class MAD > > + **********************************************************************/ > > +static ib_api_status_t perfmgr_send_mad(osm_perfmgr_t *perfmgr, > > + osm_madw_t * const p_madw) > > +{ > > + ib_api_status_t status = osm_vendor_send(perfmgr->bind_handle, p_madw, > > + TRUE); > > if (status == IB_SUCCESS) { > > /* pause thread if there are too many outstanding requests */ > > cl_atomic_inc(&(perfmgr->outstanding_queries)); > > @@ -427,6 +439,39 @@ static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr, > > } > > perfmgr->sweep_state = PERFMGR_SWEEP_ACTIVE; > > } > > + return (status); > > +} > > + > > + > > +/********************************************************************** > > + * Form and send the PortCounters MAD for a single port. > > + **********************************************************************/ > > +static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr, > > + ib_net16_t dest_lid, > > + ib_net32_t dest_qp, uint16_t pkey_ix, > > + uint8_t port, uint8_t mad_method, > > + osm_madw_context_t * p_context, > > + uint8_t sl) > > +{ > > + ib_api_status_t status = IB_SUCCESS; > > + ib_port_counters_t *port_counter = NULL; > > + ib_perfmgt_mad_t *pm_mad = NULL; > > + osm_madw_t *p_madw = NULL; > > + > > + OSM_LOG_ENTER(perfmgr->log); > > + > > + p_madw = perfmgr_build_mad(perfmgr, dest_lid, sl, dest_qp, pkey_ix, > > + mad_method, IB_MAD_ATTR_PORT_CNTRS, p_context, > > + &pm_mad); > > + if (p_madw == NULL) > > + return IB_INSUFFICIENT_MEMORY; > > + > > + port_counter = (ib_port_counters_t *) & pm_mad->data; > > + memset(port_counter, 0, sizeof(*port_counter)); > > + port_counter->port_select = port; > > + port_counter->counter_select = 0xFFFF; > > + > > + status = perfmgr_send_mad(perfmgr, p_madw); > > > > OSM_LOG_EXIT(perfmgr->log); > > return status; > > @@ -469,6 +514,7 @@ static void collect_guids(cl_map_item_t * p_map_item, void *context) > > mon_node->guid = node_guid; > > mon_node->name = strdup(node->print_desc); > > mon_node->num_ports = num_ports; > > + mon_node->node_type = node->node_info.node_type; > > /* check for enhanced switch port 0 */ > > mon_node->esp0 = (node->sw && > > ib_switch_info_is_enhanced_port0(&node->sw-> > > @@ -491,6 +537,35 @@ Exit: > > } > > > > /********************************************************************** > > + * Form and send the ClassPortInfo MAD for a single port. > > + **********************************************************************/ > > +static ib_api_status_t perfmgr_send_cpi_mad(osm_perfmgr_t * pm, > > + ib_net16_t dest_lid, > > + ib_net32_t dest_qp, > > + uint16_t pkey_ix, > > + uint8_t port, > > + osm_madw_context_t * p_context, > > + uint8_t sl) > > +{ > > + ib_api_status_t status = IB_SUCCESS; > > + osm_madw_t *p_madw = NULL; > > + > > + OSM_LOG_ENTER(pm->log); > > + > > + p_madw = perfmgr_build_mad(pm, dest_lid, sl, dest_qp, > > + pkey_ix, IB_MAD_METHOD_GET, > > + IB_MAD_ATTR_CLASS_PORT_INFO, p_context, > > + NULL); > > + if (p_madw == NULL) > > + return IB_INSUFFICIENT_MEMORY; > > + > > + status = perfmgr_send_mad(pm, p_madw); > > + > > + OSM_LOG_EXIT(pm->log); > > + return status; > > +} > > + > > +/********************************************************************** > > * query the Port Counters of all the nodes in the subnet. > > **********************************************************************/ > > static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context) > > @@ -557,22 +632,42 @@ static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context) > > mad_context.perfmgr_context.node_guid = node_guid; > > mad_context.perfmgr_context.port = port; > > mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_GET; > > + > > + if (!mon_node->port[port].cpi_valid) { > > + status = perfmgr_send_cpi_mad(pm, lid, remote_qp, > > + mon_node->port[port].pkey_ix, > > + port, &mad_context, > > + 0); /* FIXME SL != 0 */ > > + if (status != IB_SUCCESS) > > + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5410: " > > + "Failed to issue ClassPortInfo query " > > + "for node 0x%" PRIx64 > > + " port %d (%s)\n", > > + node->node_info.node_guid, port, > > + node->print_desc); > > + if (mon_node->node_type == IB_NODE_TYPE_SWITCH) > > + goto Exit; /* only need to issue 1 CPI query > > + for switches */ > > Have you tried switches with base SP0 ? Yes. Why? I admit I may be confused about when SP0 is different from physical ports. > > > + } else { > > + > > #ifdef ENABLE_OSM_PERF_MGR_PROFILE > > - gettimeofday(&mad_context.perfmgr_context.query_start, NULL); > > + gettimeofday(&mad_context.perfmgr_context.query_start, NULL); > > #endif > > - OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%" > > - PRIx64 " port %d (lid %u) (%s)\n", node_guid, port, > > - cl_ntoh16(lid), node->print_desc); > > - status = perfmgr_send_pc_mad(pm, lid, remote_qp, > > - mon_node->port[port].pkey_ix, > > - port, IB_MAD_METHOD_GET, > > - &mad_context); > > - if (status != IB_SUCCESS) > > - OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: " > > - "Failed to issue port counter query for node 0x%" > > - PRIx64 " port %d (%s)\n", > > - node->node_info.node_guid, port, > > - node->print_desc); > > + OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%" > > + PRIx64 " port %d (lid %u) (%s)\n", node_guid, port, > > + cl_ntoh16(lid), node->print_desc); > > + status = perfmgr_send_pc_mad(pm, lid, remote_qp, > > + mon_node->port[port].pkey_ix, > > + port, IB_MAD_METHOD_GET, > > + &mad_context, > > + 0); /* FIXME SL != 0 */ > > + if (status != IB_SUCCESS) > > + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: " > > + "Failed to issue port counter query for node 0x%" > > + PRIx64 " port %d (%s)\n", > > + node->node_info.node_guid, port, > > + node->print_desc); > > + } > > } > > Exit: > > cl_plock_release(&pm->osm->lock); > > @@ -1053,7 +1148,8 @@ static void perfmgr_check_overflow(osm_perfmgr_t * pm, > > /* clear port counters */ > > status = perfmgr_send_pc_mad(pm, lid, remote_qp, pkey_ix, > > port, IB_MAD_METHOD_SET, > > - &mad_context); > > + &mad_context, > > + 0); /* FIXME SL != 0 */ > > if (status != IB_SUCCESS) > > OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5411: " > > "Failed to send clear counters MAD for %s (0x%" > > @@ -1187,6 +1283,7 @@ static void pc_recv_process(void *context, void *data) > > monitored_node_t *p_mon_node; > > int16_t pkey_ix = 0; > > boolean_t valid = TRUE; > > + ib_class_port_info_t *cpi = NULL; > > > > OSM_LOG_ENTER(pm->log); > > > > @@ -1209,15 +1306,44 @@ static void pc_recv_process(void *context, void *data) > > CL_ASSERT(p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS || > > p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO); > > > > + /* capture CLASS_PORT_INFO data */ > > + if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) { > > + cpi = (ib_class_port_info_t *) & > > + (osm_madw_get_perfmgt_mad_ptr(p_madw)->data); > > + > > + cl_plock_acquire(&pm->osm->lock); > > + /* validate port number */ > > + if (port >= p_mon_node->num_ports) { > > + cl_plock_release(&pm->osm->lock); > > + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5413: " > > + "Invalid port num %d for GUID 0x%016" > > + PRIx64 " num ports %d\n", port, node_guid, > > + p_mon_node->num_ports); > > + goto Exit; > > + } > > + if (p_mon_node->node_type == IB_NODE_TYPE_SWITCH) { > > + int i = 0; > > + for (i = p_mon_node->esp0 ? 0 : 1; > > + i < p_mon_node->num_ports; > > + i++) { > > + p_mon_node->port[i].cap_mask = cpi->cap_mask; > > + p_mon_node->port[i].cpi_valid = TRUE; > > + } > > + } else { > > + p_mon_node->port[port].cap_mask = cpi->cap_mask; > > + p_mon_node->port[port].cpi_valid = TRUE; > > + } > > + cl_plock_release(&pm->osm->lock); > > + } > > + > > /* Response could also be redirection (IBM eHCA PMA does this) */ > > - if (p_mad->status & IB_MAD_STATUS_REDIRECT && > > - p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) { > > + if (p_mad->status & IB_MAD_STATUS_REDIRECT) { > > Shouldn't this be part of if (p_mad->attr_id == > IB_MAD_ATTR_CLASS_PORT_INFO) clause ? > Yes but I took care of that in the next patch where I cleaned up the code and made that entire block a function "handle_redirect". Ira > -- Hal > > > char gid_str[INET6_ADDRSTRLEN]; > > - ib_class_port_info_t *cpi = > > - (ib_class_port_info_t *) & > > - (osm_madw_get_perfmgt_mad_ptr(p_madw)->data); > > ib_api_status_t status; > > > > + CL_ASSERT(cpi); /* Redirect should have returned CPI > > + (processed in previous block) */ > > + > > OSM_LOG(pm->log, OSM_LOG_VERBOSE, > > "Redirection to LID %u GID %s QP 0x%x received\n", > > cl_ntoh16(cpi->redir_lid), > > @@ -1292,20 +1418,28 @@ static void pc_recv_process(void *context, void *data) > > if (!valid) > > goto Exit; > > > > - /* Finally, reissue the query to the redirected location */ > > - status = perfmgr_send_pc_mad(pm, cpi->redir_lid, cpi->redir_qp, > > - pkey_ix, port, > > - mad_context->perfmgr_context. > > - mad_method, mad_context); > > + /* Finally, issue a CPI query to the redirected location */ > > + p_mon_node->port[port].cpi_valid = FALSE; > > + status = perfmgr_send_cpi_mad(pm, cpi->redir_lid, > > + cpi->redir_qp, pkey_ix, > > + port, mad_context, > > + 0); /* FIXME SL != 0 */ > > if (status != IB_SUCCESS) > > OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5414: " > > - "Failed to send redirected MAD with method 0x%x for node 0x%" > > - PRIx64 " port %d\n", > > + "Failed to send redirected MAD " > > + "with method 0x%x for node %s " > > + "(NodeGuid 0x%" PRIx64 ") port %d\n", > > mad_context->perfmgr_context.mad_method, > > - node_guid, port); > > + p_mon_node->name, node_guid, port); > > goto Exit; > > } > > > > + /* ClassPortInfo needed to process optional Redirection > > + * now exit normally > > + */ > > + if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) > > + goto Exit; > > + > > perfmgr_db_fill_err_read(wire_read, &err_reading); > > /* FIXME separate query for extended counters if they are supported > > * on the port. > > -- > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Ira Weiny Member of Technical Staff Lawrence Livermore National Lab 925-423-8008 weiny2-i2BcT+NCU+M@public.gmane.org -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 8+ messages in thread
[parent not found: <20130226105803.c3bae4bdd1d2af03bce373db-i2BcT+NCU+M@public.gmane.org>]
* Re: [PATCH 01/06] opensm/perfmgr: issue ClassPortInfo as first query to each port. [not found] ` <20130226105803.c3bae4bdd1d2af03bce373db-i2BcT+NCU+M@public.gmane.org> @ 2013-02-26 19:48 ` Hal Rosenstock [not found] ` <512D118A.5010503-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> 0 siblings, 1 reply; 8+ messages in thread From: Hal Rosenstock @ 2013-02-26 19:48 UTC (permalink / raw) To: Ira Weiny Cc: Hal Rosenstock, linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org On 2/26/2013 1:58 PM, Ira Weiny wrote: > On Tue, 26 Feb 2013 10:03:35 -0500 > Hal Rosenstock <hal-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> wrote: > >> On 2/21/2013 4:33 PM, Ira Weiny wrote: >>> >> >> So 2 round trips are now needed for first time ports now to determine >> whether or not extended counters are supported. I don't see a better way >> around this. > > Nope, this is the best we can do with the current spec. > >> >>> >>> Signed-off-by: Ira Weiny <weiny2-i2BcT+NCU+M@public.gmane.org> >>> --- >>> include/opensm/osm_perfmgr.h | 4 + >>> opensm/osm_perfmgr.c | 224 +++++++++++++++++++++++++++++++++--------- >>> 2 files changed, 183 insertions(+), 45 deletions(-) >>> >>> diff --git a/include/opensm/osm_perfmgr.h b/include/opensm/osm_perfmgr.h >>> index 26b1ae6..3fa42d5 100644 >>> --- a/include/opensm/osm_perfmgr.h >>> +++ b/include/opensm/osm_perfmgr.h >>> @@ -100,6 +100,9 @@ typedef struct monitored_port { >>> ib_net16_t lid; >>> ib_net16_t pkey; >>> ib_net32_t qp; >>> + /* ClassPortInfo fields */ >>> + boolean_t cpi_valid; >>> + ib_net16_t cap_mask; >>> } monitored_port_t; >>> >>> /* Node to store information about nodes being monitored */ >>> @@ -107,6 +110,7 @@ typedef struct monitored_node { >>> cl_map_item_t map_item; >>> struct monitored_node *next; >>> uint64_t guid; >>> + uint8_t node_type; >>> boolean_t esp0; >>> char *name; >>> uint32_t num_ports; >>> diff --git a/opensm/osm_perfmgr.c b/opensm/osm_perfmgr.c >>> index 9bc1154..c71111f 100644 >>> --- a/opensm/osm_perfmgr.c >>> +++ b/opensm/osm_perfmgr.c >>> @@ -356,17 +356,20 @@ static ib_net16_t get_lid(osm_node_t * p_node, uint8_t port, >>> return get_base_lid(p_node, port); >>> } >>> >>> + >>> /********************************************************************** >>> - * Form and send the Port Counters MAD for a single port. >>> + * Build a Performance Management class MAD >>> **********************************************************************/ >>> -static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr, >>> - ib_net16_t dest_lid, >>> - ib_net32_t dest_qp, uint16_t pkey_ix, >>> - uint8_t port, uint8_t mad_method, >>> - osm_madw_context_t * p_context) >>> +static osm_madw_t *perfmgr_build_mad(osm_perfmgr_t * perfmgr, >>> + ib_net16_t dest_lid, >>> + uint8_t sl, >>> + ib_net32_t dest_qp, >>> + uint16_t pkey_ix, >>> + uint8_t mad_method, >>> + ib_net16_t attr_id, >>> + osm_madw_context_t * p_context, >>> + ib_perfmgt_mad_t ** p_pm_mad) >>> { >>> - ib_api_status_t status = IB_SUCCESS; >>> - ib_port_counters_t *port_counter = NULL; >>> ib_perfmgt_mad_t *pm_mad = NULL; >>> osm_madw_t *p_madw = NULL; >>> >>> @@ -375,7 +378,7 @@ static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr, >>> p_madw = osm_mad_pool_get(perfmgr->mad_pool, perfmgr->bind_handle, >>> MAD_BLOCK_SIZE, NULL); >>> if (p_madw == NULL) >>> - return IB_INSUFFICIENT_MEMORY; >>> + return NULL; >>> >>> pm_mad = osm_madw_get_perfmgt_mad_ptr(p_madw); >>> >>> @@ -393,29 +396,38 @@ static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr, >>> pm_mad->header.trans_id = >>> cl_hton64((uint64_t) cl_atomic_inc(&perfmgr->trans_id) & >>> (uint64_t) (0xFFFFFFFF)); >>> - pm_mad->header.attr_id = IB_MAD_ATTR_PORT_CNTRS; >>> + pm_mad->header.attr_id = attr_id; >>> pm_mad->header.resv = 0; >>> pm_mad->header.attr_mod = 0; >>> >>> - port_counter = (ib_port_counters_t *) & pm_mad->data; >>> - memset(port_counter, 0, sizeof(*port_counter)); >>> - port_counter->port_select = port; >>> - port_counter->counter_select = 0xFFFF; >>> - >>> p_madw->mad_addr.dest_lid = dest_lid; >>> p_madw->mad_addr.addr_type.gsi.remote_qp = dest_qp; >>> p_madw->mad_addr.addr_type.gsi.remote_qkey = >>> cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY); >>> p_madw->mad_addr.addr_type.gsi.pkey_ix = pkey_ix; >>> - p_madw->mad_addr.addr_type.gsi.service_level = 0; >>> + p_madw->mad_addr.addr_type.gsi.service_level = sl; >>> p_madw->mad_addr.addr_type.gsi.global_route = FALSE; >>> p_madw->resp_expected = TRUE; >>> >>> if (p_context) >>> p_madw->context = *p_context; >>> >>> - status = osm_vendor_send(perfmgr->bind_handle, p_madw, TRUE); >>> + if (p_pm_mad) >>> + *p_pm_mad = pm_mad; >> >> Nit: formatting (tabs rather than spaces) > > Got it in V2 Thanks. > >> >>> + >>> + OSM_LOG_EXIT(perfmgr->log); >>> + >>> + return (p_madw); >>> +} >>> >>> +/********************************************************************** >>> + * Send a Performance Management class MAD >>> + **********************************************************************/ >>> +static ib_api_status_t perfmgr_send_mad(osm_perfmgr_t *perfmgr, >>> + osm_madw_t * const p_madw) >>> +{ >>> + ib_api_status_t status = osm_vendor_send(perfmgr->bind_handle, p_madw, >>> + TRUE); >>> if (status == IB_SUCCESS) { >>> /* pause thread if there are too many outstanding requests */ >>> cl_atomic_inc(&(perfmgr->outstanding_queries)); >>> @@ -427,6 +439,39 @@ static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr, >>> } >>> perfmgr->sweep_state = PERFMGR_SWEEP_ACTIVE; >>> } >>> + return (status); >>> +} >>> + >>> + >>> +/********************************************************************** >>> + * Form and send the PortCounters MAD for a single port. >>> + **********************************************************************/ >>> +static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr, >>> + ib_net16_t dest_lid, >>> + ib_net32_t dest_qp, uint16_t pkey_ix, >>> + uint8_t port, uint8_t mad_method, >>> + osm_madw_context_t * p_context, >>> + uint8_t sl) >>> +{ >>> + ib_api_status_t status = IB_SUCCESS; >>> + ib_port_counters_t *port_counter = NULL; >>> + ib_perfmgt_mad_t *pm_mad = NULL; >>> + osm_madw_t *p_madw = NULL; >>> + >>> + OSM_LOG_ENTER(perfmgr->log); >>> + >>> + p_madw = perfmgr_build_mad(perfmgr, dest_lid, sl, dest_qp, pkey_ix, >>> + mad_method, IB_MAD_ATTR_PORT_CNTRS, p_context, >>> + &pm_mad); >>> + if (p_madw == NULL) >>> + return IB_INSUFFICIENT_MEMORY; >>> + >>> + port_counter = (ib_port_counters_t *) & pm_mad->data; >>> + memset(port_counter, 0, sizeof(*port_counter)); >>> + port_counter->port_select = port; >>> + port_counter->counter_select = 0xFFFF; >>> + >>> + status = perfmgr_send_mad(perfmgr, p_madw); >>> >>> OSM_LOG_EXIT(perfmgr->log); >>> return status; >>> @@ -469,6 +514,7 @@ static void collect_guids(cl_map_item_t * p_map_item, void *context) >>> mon_node->guid = node_guid; >>> mon_node->name = strdup(node->print_desc); >>> mon_node->num_ports = num_ports; >>> + mon_node->node_type = node->node_info.node_type; >>> /* check for enhanced switch port 0 */ >>> mon_node->esp0 = (node->sw && >>> ib_switch_info_is_enhanced_port0(&node->sw-> >>> @@ -491,6 +537,35 @@ Exit: >>> } >>> >>> /********************************************************************** >>> + * Form and send the ClassPortInfo MAD for a single port. >>> + **********************************************************************/ >>> +static ib_api_status_t perfmgr_send_cpi_mad(osm_perfmgr_t * pm, >>> + ib_net16_t dest_lid, >>> + ib_net32_t dest_qp, >>> + uint16_t pkey_ix, >>> + uint8_t port, >>> + osm_madw_context_t * p_context, >>> + uint8_t sl) >>> +{ >>> + ib_api_status_t status = IB_SUCCESS; >>> + osm_madw_t *p_madw = NULL; >>> + >>> + OSM_LOG_ENTER(pm->log); >>> + >>> + p_madw = perfmgr_build_mad(pm, dest_lid, sl, dest_qp, >>> + pkey_ix, IB_MAD_METHOD_GET, >>> + IB_MAD_ATTR_CLASS_PORT_INFO, p_context, >>> + NULL); >>> + if (p_madw == NULL) >>> + return IB_INSUFFICIENT_MEMORY; >>> + >>> + status = perfmgr_send_mad(pm, p_madw); >>> + >>> + OSM_LOG_EXIT(pm->log); >>> + return status; >>> +} >>> + >>> +/********************************************************************** >>> * query the Port Counters of all the nodes in the subnet. >>> **********************************************************************/ >>> static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context) >>> @@ -557,22 +632,42 @@ static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context) >>> mad_context.perfmgr_context.node_guid = node_guid; >>> mad_context.perfmgr_context.port = port; >>> mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_GET; >>> + >>> + if (!mon_node->port[port].cpi_valid) { >>> + status = perfmgr_send_cpi_mad(pm, lid, remote_qp, >>> + mon_node->port[port].pkey_ix, >>> + port, &mad_context, >>> + 0); /* FIXME SL != 0 */ >>> + if (status != IB_SUCCESS) >>> + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5410: " >>> + "Failed to issue ClassPortInfo query " >>> + "for node 0x%" PRIx64 >>> + " port %d (%s)\n", >>> + node->node_info.node_guid, port, >>> + node->print_desc); >>> + if (mon_node->node_type == IB_NODE_TYPE_SWITCH) >>> + goto Exit; /* only need to issue 1 CPI query >>> + for switches */ >> >> Have you tried switches with base SP0 ? > > Yes. Why? I admit I may be confused about when SP0 is different from physical ports. The reason I asked here is that the ClassPortInfo when BSP0 is being requested on port 1 not 0. I'm not sure what you're referring to in terms of SP0 being different from physical ports so I'll elaborate on what I think you may be asking: In general, BSP0 is different than ESP0 in terms of PMA attributes in that it is excluded in PortSelect. PortSelect says "However, 0 is only valid for the enhanced switch management port; it is ignored for the base switch management port." SP0 is different from physical ports in that it's a virtual IB port (usually across CPU link or the like) and doesn't have a remote peer. >> >>> + } else { >>> + >>> #ifdef ENABLE_OSM_PERF_MGR_PROFILE >>> - gettimeofday(&mad_context.perfmgr_context.query_start, NULL); >>> + gettimeofday(&mad_context.perfmgr_context.query_start, NULL); >>> #endif >>> - OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%" >>> - PRIx64 " port %d (lid %u) (%s)\n", node_guid, port, >>> - cl_ntoh16(lid), node->print_desc); >>> - status = perfmgr_send_pc_mad(pm, lid, remote_qp, >>> - mon_node->port[port].pkey_ix, >>> - port, IB_MAD_METHOD_GET, >>> - &mad_context); >>> - if (status != IB_SUCCESS) >>> - OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: " >>> - "Failed to issue port counter query for node 0x%" >>> - PRIx64 " port %d (%s)\n", >>> - node->node_info.node_guid, port, >>> - node->print_desc); >>> + OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%" >>> + PRIx64 " port %d (lid %u) (%s)\n", node_guid, port, >>> + cl_ntoh16(lid), node->print_desc); >>> + status = perfmgr_send_pc_mad(pm, lid, remote_qp, >>> + mon_node->port[port].pkey_ix, >>> + port, IB_MAD_METHOD_GET, >>> + &mad_context, >>> + 0); /* FIXME SL != 0 */ >>> + if (status != IB_SUCCESS) >>> + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: " >>> + "Failed to issue port counter query for node 0x%" >>> + PRIx64 " port %d (%s)\n", >>> + node->node_info.node_guid, port, >>> + node->print_desc); >>> + } >>> } >>> Exit: >>> cl_plock_release(&pm->osm->lock); >>> @@ -1053,7 +1148,8 @@ static void perfmgr_check_overflow(osm_perfmgr_t * pm, >>> /* clear port counters */ >>> status = perfmgr_send_pc_mad(pm, lid, remote_qp, pkey_ix, >>> port, IB_MAD_METHOD_SET, >>> - &mad_context); >>> + &mad_context, >>> + 0); /* FIXME SL != 0 */ >>> if (status != IB_SUCCESS) >>> OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5411: " >>> "Failed to send clear counters MAD for %s (0x%" >>> @@ -1187,6 +1283,7 @@ static void pc_recv_process(void *context, void *data) >>> monitored_node_t *p_mon_node; >>> int16_t pkey_ix = 0; >>> boolean_t valid = TRUE; >>> + ib_class_port_info_t *cpi = NULL; >>> >>> OSM_LOG_ENTER(pm->log); >>> >>> @@ -1209,15 +1306,44 @@ static void pc_recv_process(void *context, void *data) >>> CL_ASSERT(p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS || >>> p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO); >>> >>> + /* capture CLASS_PORT_INFO data */ >>> + if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) { >>> + cpi = (ib_class_port_info_t *) & >>> + (osm_madw_get_perfmgt_mad_ptr(p_madw)->data); >>> + >>> + cl_plock_acquire(&pm->osm->lock); >>> + /* validate port number */ >>> + if (port >= p_mon_node->num_ports) { >>> + cl_plock_release(&pm->osm->lock); >>> + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5413: " >>> + "Invalid port num %d for GUID 0x%016" >>> + PRIx64 " num ports %d\n", port, node_guid, >>> + p_mon_node->num_ports); >>> + goto Exit; >>> + } >>> + if (p_mon_node->node_type == IB_NODE_TYPE_SWITCH) { >>> + int i = 0; >>> + for (i = p_mon_node->esp0 ? 0 : 1; >>> + i < p_mon_node->num_ports; >>> + i++) { >>> + p_mon_node->port[i].cap_mask = cpi->cap_mask; >>> + p_mon_node->port[i].cpi_valid = TRUE; >>> + } >>> + } else { >>> + p_mon_node->port[port].cap_mask = cpi->cap_mask; >>> + p_mon_node->port[port].cpi_valid = TRUE; >>> + } >>> + cl_plock_release(&pm->osm->lock); >>> + } >>> + >>> /* Response could also be redirection (IBM eHCA PMA does this) */ >>> - if (p_mad->status & IB_MAD_STATUS_REDIRECT && >>> - p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) { >>> + if (p_mad->status & IB_MAD_STATUS_REDIRECT) { >> >> Shouldn't this be part of if (p_mad->attr_id == >> IB_MAD_ATTR_CLASS_PORT_INFO) clause ? >> > > Yes but I took care of that in the next patch where I cleaned up the code and made that entire block a function "handle_redirect". I see that now. The semantics here shouldn't have been changed but it's probably OK that the next patch fixes that again. -- Hal > Ira <snip...> -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 8+ messages in thread
[parent not found: <512D118A.5010503-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>]
* Re: [PATCH 01/06] opensm/perfmgr: issue ClassPortInfo as first query to each port. [not found] ` <512D118A.5010503-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> @ 2013-02-26 21:07 ` Ira Weiny [not found] ` <20130226130711.8d96c1834b47c55e2abef24e-i2BcT+NCU+M@public.gmane.org> 0 siblings, 1 reply; 8+ messages in thread From: Ira Weiny @ 2013-02-26 21:07 UTC (permalink / raw) To: Hal Rosenstock Cc: Hal Rosenstock, linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org On Tue, 26 Feb 2013 14:48:26 -0500 Hal Rosenstock <hal-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> wrote: > On 2/26/2013 1:58 PM, Ira Weiny wrote: > > On Tue, 26 Feb 2013 10:03:35 -0500 > > Hal Rosenstock <hal-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> wrote: > > > >> On 2/21/2013 4:33 PM, Ira Weiny wrote: > >>> [snip] > >>> +/********************************************************************** > >>> * query the Port Counters of all the nodes in the subnet. > >>> **********************************************************************/ > >>> static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context) > >>> @@ -557,22 +632,42 @@ static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context) > >>> mad_context.perfmgr_context.node_guid = node_guid; > >>> mad_context.perfmgr_context.port = port; > >>> mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_GET; > >>> + > >>> + if (!mon_node->port[port].cpi_valid) { > >>> + status = perfmgr_send_cpi_mad(pm, lid, remote_qp, > >>> + mon_node->port[port].pkey_ix, > >>> + port, &mad_context, > >>> + 0); /* FIXME SL != 0 */ > >>> + if (status != IB_SUCCESS) > >>> + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5410: " > >>> + "Failed to issue ClassPortInfo query " > >>> + "for node 0x%" PRIx64 > >>> + " port %d (%s)\n", > >>> + node->node_info.node_guid, port, > >>> + node->print_desc); > >>> + if (mon_node->node_type == IB_NODE_TYPE_SWITCH) > >>> + goto Exit; /* only need to issue 1 CPI query > >>> + for switches */ > >> > >> Have you tried switches with base SP0 ? > > > > Yes. Why? I admit I may be confused about when SP0 is different from physical ports. > > The reason I asked here is that the ClassPortInfo when BSP0 is being > requested on port 1 not 0. > > I'm not sure what you're referring to in terms of SP0 being different > from physical ports so I'll elaborate on what I think you may be asking: > > In general, BSP0 is different than ESP0 in terms of PMA attributes in > that it is excluded in PortSelect. PortSelect says "However, 0 is > only valid for the enhanced switch management port; it is > ignored for the base switch management port." Right, BSP0 doesn't have PM counters. So why did you ask if I had tried this with base SP0? To be clear I have tried this with both Enhanced SP0 and Base SP0 and it seems to work correctly. Ira > > SP0 is different from physical ports in that it's a virtual IB port > (usually across CPU link or the like) and doesn't have a remote peer. > > >> > >>> + } else { > >>> + > >>> #ifdef ENABLE_OSM_PERF_MGR_PROFILE > >>> - gettimeofday(&mad_context.perfmgr_context.query_start, NULL); > >>> + gettimeofday(&mad_context.perfmgr_context.query_start, NULL); > >>> #endif > >>> - OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%" > >>> - PRIx64 " port %d (lid %u) (%s)\n", node_guid, port, > >>> - cl_ntoh16(lid), node->print_desc); > >>> - status = perfmgr_send_pc_mad(pm, lid, remote_qp, > >>> - mon_node->port[port].pkey_ix, > >>> - port, IB_MAD_METHOD_GET, > >>> - &mad_context); > >>> - if (status != IB_SUCCESS) > >>> - OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: " > >>> - "Failed to issue port counter query for node 0x%" > >>> - PRIx64 " port %d (%s)\n", > >>> - node->node_info.node_guid, port, > >>> - node->print_desc); > >>> + OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%" > >>> + PRIx64 " port %d (lid %u) (%s)\n", node_guid, port, > >>> + cl_ntoh16(lid), node->print_desc); > >>> + status = perfmgr_send_pc_mad(pm, lid, remote_qp, > >>> + mon_node->port[port].pkey_ix, > >>> + port, IB_MAD_METHOD_GET, > >>> + &mad_context, > >>> + 0); /* FIXME SL != 0 */ > >>> + if (status != IB_SUCCESS) > >>> + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: " > >>> + "Failed to issue port counter query for node 0x%" > >>> + PRIx64 " port %d (%s)\n", > >>> + node->node_info.node_guid, port, > >>> + node->print_desc); > >>> + } > >>> } > >>> Exit: > >>> cl_plock_release(&pm->osm->lock); > >>> @@ -1053,7 +1148,8 @@ static void perfmgr_check_overflow(osm_perfmgr_t * pm, > >>> /* clear port counters */ > >>> status = perfmgr_send_pc_mad(pm, lid, remote_qp, pkey_ix, > >>> port, IB_MAD_METHOD_SET, > >>> - &mad_context); > >>> + &mad_context, > >>> + 0); /* FIXME SL != 0 */ > >>> if (status != IB_SUCCESS) > >>> OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5411: " > >>> "Failed to send clear counters MAD for %s (0x%" > >>> @@ -1187,6 +1283,7 @@ static void pc_recv_process(void *context, void *data) > >>> monitored_node_t *p_mon_node; > >>> int16_t pkey_ix = 0; > >>> boolean_t valid = TRUE; > >>> + ib_class_port_info_t *cpi = NULL; > >>> > >>> OSM_LOG_ENTER(pm->log); > >>> > >>> @@ -1209,15 +1306,44 @@ static void pc_recv_process(void *context, void *data) > >>> CL_ASSERT(p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS || > >>> p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO); > >>> > >>> + /* capture CLASS_PORT_INFO data */ > >>> + if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) { > >>> + cpi = (ib_class_port_info_t *) & > >>> + (osm_madw_get_perfmgt_mad_ptr(p_madw)->data); > >>> + > >>> + cl_plock_acquire(&pm->osm->lock); > >>> + /* validate port number */ > >>> + if (port >= p_mon_node->num_ports) { > >>> + cl_plock_release(&pm->osm->lock); > >>> + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5413: " > >>> + "Invalid port num %d for GUID 0x%016" > >>> + PRIx64 " num ports %d\n", port, node_guid, > >>> + p_mon_node->num_ports); > >>> + goto Exit; > >>> + } > >>> + if (p_mon_node->node_type == IB_NODE_TYPE_SWITCH) { > >>> + int i = 0; > >>> + for (i = p_mon_node->esp0 ? 0 : 1; > >>> + i < p_mon_node->num_ports; > >>> + i++) { > >>> + p_mon_node->port[i].cap_mask = cpi->cap_mask; > >>> + p_mon_node->port[i].cpi_valid = TRUE; > >>> + } > >>> + } else { > >>> + p_mon_node->port[port].cap_mask = cpi->cap_mask; > >>> + p_mon_node->port[port].cpi_valid = TRUE; > >>> + } > >>> + cl_plock_release(&pm->osm->lock); > >>> + } > >>> + > >>> /* Response could also be redirection (IBM eHCA PMA does this) */ > >>> - if (p_mad->status & IB_MAD_STATUS_REDIRECT && > >>> - p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) { > >>> + if (p_mad->status & IB_MAD_STATUS_REDIRECT) { > >> > >> Shouldn't this be part of if (p_mad->attr_id == > >> IB_MAD_ATTR_CLASS_PORT_INFO) clause ? > >> > > > > Yes but I took care of that in the next patch where I cleaned up the code and made that entire block a function "handle_redirect". > > I see that now. The semantics here shouldn't have been changed but it's > probably OK that the next patch fixes that again. > > -- Hal > > > Ira > > <snip...> > -- > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Ira Weiny Member of Technical Staff Lawrence Livermore National Lab 925-423-8008 weiny2-i2BcT+NCU+M@public.gmane.org -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 8+ messages in thread
[parent not found: <20130226130711.8d96c1834b47c55e2abef24e-i2BcT+NCU+M@public.gmane.org>]
* Re: [PATCH 01/06] opensm/perfmgr: issue ClassPortInfo as first query to each port. [not found] ` <20130226130711.8d96c1834b47c55e2abef24e-i2BcT+NCU+M@public.gmane.org> @ 2013-02-26 21:14 ` Hal Rosenstock [not found] ` <512D25CC.3010109-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> 0 siblings, 1 reply; 8+ messages in thread From: Hal Rosenstock @ 2013-02-26 21:14 UTC (permalink / raw) To: Ira Weiny Cc: Hal Rosenstock, linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org On 2/26/2013 4:07 PM, Ira Weiny wrote: > Right, BSP0 doesn't have PM counters. So why did you ask if I had tried this with base SP0? I thought I answered that in the previous response: "The reason I asked here is that the ClassPortInfo is being requested on port 1 not 0 for BSP0." -- Hal > To be clear I have tried this with both Enhanced SP0 and Base SP0 and it seems to work correctly. > > Ira -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 8+ messages in thread
[parent not found: <512D25CC.3010109-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>]
* Re: [PATCH 01/06] opensm/perfmgr: issue ClassPortInfo as first query to each port. [not found] ` <512D25CC.3010109-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> @ 2013-02-26 21:24 ` Ira Weiny [not found] ` <20130226132400.4ec40c660efd0c1eb0b6f69c-i2BcT+NCU+M@public.gmane.org> 0 siblings, 1 reply; 8+ messages in thread From: Ira Weiny @ 2013-02-26 21:24 UTC (permalink / raw) To: Hal Rosenstock Cc: Hal Rosenstock, linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org On Tue, 26 Feb 2013 16:14:52 -0500 Hal Rosenstock <hal-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> wrote: > On 2/26/2013 4:07 PM, Ira Weiny wrote: > > Right, BSP0 doesn't have PM counters. So why did you ask if I had tried this with base SP0? > > I thought I answered that in the previous response: > "The reason I asked here is that the ClassPortInfo is being requested on > port 1 not 0 for BSP0." But that is the correct thing to do, right? Ira > > -- Hal > > > To be clear I have tried this with both Enhanced SP0 and Base SP0 and it seems to work correctly. > > > > Ira > > -- > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Ira Weiny Member of Technical Staff Lawrence Livermore National Lab 925-423-8008 weiny2-i2BcT+NCU+M@public.gmane.org -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 8+ messages in thread
[parent not found: <20130226132400.4ec40c660efd0c1eb0b6f69c-i2BcT+NCU+M@public.gmane.org>]
* Re: [PATCH 01/06] opensm/perfmgr: issue ClassPortInfo as first query to each port. [not found] ` <20130226132400.4ec40c660efd0c1eb0b6f69c-i2BcT+NCU+M@public.gmane.org> @ 2013-02-26 22:43 ` Hal Rosenstock 0 siblings, 0 replies; 8+ messages in thread From: Hal Rosenstock @ 2013-02-26 22:43 UTC (permalink / raw) To: Ira Weiny Cc: Hal Rosenstock, linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org On 2/26/2013 4:24 PM, Ira Weiny wrote: > On Tue, 26 Feb 2013 16:14:52 -0500 > Hal Rosenstock <hal-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> wrote: > >> On 2/26/2013 4:07 PM, Ira Weiny wrote: >>> Right, BSP0 doesn't have PM counters. So why did you ask if I had tried this with base SP0? >> >> I thought I answered that in the previous response: >> "The reason I asked here is that the ClassPortInfo is being requested on >> port 1 not 0 for BSP0." > > But that is the correct thing to do, right? I think that's legal per: C13-29: The ClassPortInfo attribute shall be implemented for every GS class agent supported by a node; it shall be implemented on every port through which the GS class agent may be accessed. -- Hal > > Ira > >> >> -- Hal >> >>> To be clear I have tried this with both Enhanced SP0 and Base SP0 and it seems to work correctly. >>> >>> Ira >> >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in >> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html > > -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2013-02-26 22:43 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-02-21 21:33 [PATCH 01/06] opensm/perfmgr: issue ClassPortInfo as first query to each port Ira Weiny
[not found] ` <20130221133331.593f30977427848f4373b57a-i2BcT+NCU+M@public.gmane.org>
2013-02-26 15:03 ` Hal Rosenstock
[not found] ` <512CCEC7.70304-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2013-02-26 18:58 ` Ira Weiny
[not found] ` <20130226105803.c3bae4bdd1d2af03bce373db-i2BcT+NCU+M@public.gmane.org>
2013-02-26 19:48 ` Hal Rosenstock
[not found] ` <512D118A.5010503-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2013-02-26 21:07 ` Ira Weiny
[not found] ` <20130226130711.8d96c1834b47c55e2abef24e-i2BcT+NCU+M@public.gmane.org>
2013-02-26 21:14 ` Hal Rosenstock
[not found] ` <512D25CC.3010109-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2013-02-26 21:24 ` Ira Weiny
[not found] ` <20130226132400.4ec40c660efd0c1eb0b6f69c-i2BcT+NCU+M@public.gmane.org>
2013-02-26 22:43 ` Hal Rosenstock
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox