* [PATCH] opensm: Manage ports that do not support congestion control
@ 2012-08-29 17:40 Albert Chu
[not found] ` <1346262032.15926.22.camel-akkeaxHeDKRliZ7u+bvwcg@public.gmane.org>
0 siblings, 1 reply; 2+ messages in thread
From: Albert Chu @ 2012-08-29 17:40 UTC (permalink / raw)
To: linux-rdma-u79uwXL29TY76Z2rM5mHXA
Support the ability to run congestion support in an environment
where some IB hardware does not support congestion control.
In addition, by flagging ports that do not support congestion
control, we can reduce the amount of MAD traffic and log messages.
Signed-off-by: Albert Chu <chu11-i2BcT+NCU+M@public.gmane.org>
---
include/opensm/osm_congestion_control.h | 2 +
include/opensm/osm_port.h | 8 ++++
opensm/osm_congestion_control.c | 68 ++++++++++++++++++++++++++-----
3 files changed, 68 insertions(+), 10 deletions(-)
diff --git a/include/opensm/osm_congestion_control.h b/include/opensm/osm_congestion_control.h
index 94e4ffb..78e4a64 100644
--- a/include/opensm/osm_congestion_control.h
+++ b/include/opensm/osm_congestion_control.h
@@ -67,6 +67,8 @@
#define OSM_CC_DEFAULT_MAX_OUTSTANDING_QUERIES 500
+#define OSM_CC_TIMEOUT_COUNT_THRESHOLD 3
+
/****s* OpenSM: CongestionControl/osm_congestion_control_t
* This object should be treated as opaque and should
* be manipulated only through the provided functions.
diff --git a/include/opensm/osm_port.h b/include/opensm/osm_port.h
index 5fc186c..8e34b84 100644
--- a/include/opensm/osm_port.h
+++ b/include/opensm/osm_port.h
@@ -1163,6 +1163,8 @@ typedef struct osm_port {
osm_physp_t *p_physp;
cl_qlist_t mcm_list;
int flag;
+ unsigned int cc_timeout_count;
+ int cc_unavailable_flag;
void *priv;
} osm_port_t;
/*
@@ -1195,6 +1197,12 @@ typedef struct osm_port {
* flag
* Utility flag for port management
*
+* cc_timeout_count
+* Count number of times congestion control config times out.
+*
+* cc_unavailable_flag
+* Flag indicating if congestion control is not supported.
+*
* SEE ALSO
* Port, Physical Port, Physical Port Table
*********/
diff --git a/opensm/osm_congestion_control.c b/opensm/osm_congestion_control.c
index 8be333d..3a7be18 100644
--- a/opensm/osm_congestion_control.c
+++ b/opensm/osm_congestion_control.c
@@ -378,6 +378,9 @@ int osm_congestion_control_setup(struct osm_opensm *p_osm)
p_next = cl_qmap_next(p_next);
+ if (p_port->cc_unavailable_flag)
+ continue;
+
if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) {
status = cc_send_sw_cong_setting(&p_osm->sm, p_node);
if (status != IB_SUCCESS)
@@ -468,6 +471,18 @@ static void cc_rcv_mad(void *context, void *data)
goto Exit;
}
+ p_port->cc_timeout_count = 0;
+
+ if (p_cc_mad->header.status) {
+ if (p_cc_mad->header.status & IB_MAD_STATUS_UNSUP_METHOD
+ || p_cc_mad->header.status & IB_MAD_STATUS_UNSUP_METHOD_ATTR)
+ p_port->cc_unavailable_flag = 1;
+ cl_plock_release(&p_osm->lock);
+ goto Exit;
+ }
+ else
+ p_port->cc_unavailable_flag = 0;
+
if (p_cc_mad->header.attr_id == IB_MAD_ATTR_SW_CONG_SETTING) {
ib_sw_cong_setting_t *p_sw_cong_setting;
@@ -637,23 +652,56 @@ static void cc_mad_send_err_callback(void *bind_context,
{
osm_congestion_control_t *p_cc = bind_context;
osm_madw_context_t *p_madw_context = &p_madw->context;
+ osm_opensm_t *p_osm = p_cc->osm;
uint64_t node_guid = p_madw_context->cc_context.node_guid;
+ uint64_t port_guid = p_madw_context->cc_context.port_guid;
uint8_t port = p_madw_context->cc_context.port;
+ osm_port_t *p_port;
+ int log_flag = 1;
OSM_LOG_ENTER(p_cc->log);
- OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C106: MAD Error (%s): "
- "attr id = %u LID %u GUID 0x%016" PRIx64 " port %u "
- "TID 0x%" PRIx64 "\n",
- ib_get_err_str(p_madw->status),
- p_madw->p_mad->attr_id,
- cl_ntoh16(p_madw->mad_addr.dest_lid),
- node_guid,
- port,
- cl_ntoh64(p_madw->p_mad->trans_id));
+ cl_plock_acquire(&p_osm->lock);
+
+ p_port = osm_get_port_by_guid(p_cc->subn, port_guid);
+ if (!p_port) {
+ OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C106A: "
+ "Port guid not in table 0x%" PRIx64 "\n",
+ port_guid);
+ cl_plock_release(&p_osm->lock);
+ goto Exit;
+ }
- p_cc->subn->subnet_initialization_error = TRUE;
+ /* If timed out before, don't bothering logging again
+ * we assume no CC support
+ */
+ if (p_madw->status == IB_TIMEOUT
+ && p_port->cc_timeout_count)
+ log_flag = 0;
+
+ if (log_flag)
+ OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C106: MAD Error (%s): "
+ "attr id = %u LID %u GUID 0x%016" PRIx64 " port %u "
+ "TID 0x%" PRIx64 "\n",
+ ib_get_err_str(p_madw->status),
+ p_madw->p_mad->attr_id,
+ cl_ntoh16(p_madw->mad_addr.dest_lid),
+ node_guid,
+ port,
+ cl_ntoh64(p_madw->p_mad->trans_id));
+
+ if (p_madw->status == IB_TIMEOUT) {
+ p_port->cc_timeout_count++;
+ if (p_port->cc_timeout_count > OSM_CC_TIMEOUT_COUNT_THRESHOLD
+ && !p_port->cc_unavailable_flag)
+ p_port->cc_unavailable_flag++;
+ }
+ else
+ p_cc->subn->subnet_initialization_error = TRUE;
+ cl_plock_release(&p_osm->lock);
+
+Exit:
osm_mad_pool_put(p_cc->mad_pool, p_madw);
decrement_outstanding_mads(p_cc);
--
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH] opensm: Manage ports that do not support congestion control
[not found] ` <1346262032.15926.22.camel-akkeaxHeDKRliZ7u+bvwcg@public.gmane.org>
@ 2012-09-16 10:11 ` Alex Netes
0 siblings, 0 replies; 2+ messages in thread
From: Alex Netes @ 2012-09-16 10:11 UTC (permalink / raw)
To: Albert Chu; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
Hi Albert,
On 10:40 Wed 29 Aug , Albert Chu wrote:
> Support the ability to run congestion support in an environment
> where some IB hardware does not support congestion control.
>
> In addition, by flagging ports that do not support congestion
> control, we can reduce the amount of MAD traffic and log messages.
>
> Signed-off-by: Albert Chu <chu11-i2BcT+NCU+M@public.gmane.org>
> ---
Applied, thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2012-09-16 10:11 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-08-29 17:40 [PATCH] opensm: Manage ports that do not support congestion control Albert Chu
[not found] ` <1346262032.15926.22.camel-akkeaxHeDKRliZ7u+bvwcg@public.gmane.org>
2012-09-16 10:11 ` Alex Netes
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).