* [PATCH 2/5] opensm: change discovery order of switch data
[not found] ` <1391425516-14462-1-git-send-email-alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
@ 2014-02-03 11:05 ` Alex Netes
[not found] ` <1391425516-14462-2-git-send-email-alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-02-03 11:05 ` [PATCH 3/5] opensm: Better handle topology changes in the fabric Alex Netes
` (3 subsequent siblings)
4 siblings, 1 reply; 11+ messages in thread
From: Alex Netes @ 2014-02-03 11:05 UTC (permalink / raw)
To: hal-VPRAkNaXOzVWk0Htik3J/w, linux-rdma-u79uwXL29TY76Z2rM5mHXA; +Cc: Alex Netes
Previously upon receiving GetResp(NodeInfo) of a switch, SM sent
Get(SwitchInfo) and Get(PortInfo) to all its' ports in parallel. Upon receiving
GetResp(PortInfo) SM sends Get(PkeyTable). The problem is that we need
SwitchInfo.PartEnforceCap value to calculate max Pkeys block, so in case
one of the GetResp(PortInfo) arrives prior to GetResp(SwitchInfo) this
value won't be set.
The fix to change the discover order. Upon receiving GetResP(NodeInfo),
SM sends Get(SwitchInfo). Upon receiving GetResp(SwitchInfo), SM sends
Get(PortInfo port0). If we don't get GetResp(PortInfo port=0), SM will
drop the switch, otherwise SM sends Get(PortInfo ExtPorts).
Moreover, now SM queries for ExtPortInfo and Pkeys only for non-Down
ports.
Signed-off-by: Alex Netes <alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
opensm/osm_node_info_rcv.c | 43 ++++-------
opensm/osm_port_info_rcv.c | 165 +++++++++++++++++++++++++++++++++-----------
opensm/osm_sw_info_rcv.c | 43 ++++++++++++
3 files changed, 183 insertions(+), 68 deletions(-)
diff --git a/opensm/osm_node_info_rcv.c b/opensm/osm_node_info_rcv.c
index 4242924..b4e00f3 100644
--- a/opensm/osm_node_info_rcv.c
+++ b/opensm/osm_node_info_rcv.c
@@ -283,19 +283,13 @@ static void ni_rcv_get_port_info(IN osm_sm_t * sm, IN osm_node_t * node,
osm_madw_context_t context;
osm_physp_t *physp;
ib_node_info_t *ni;
- unsigned port, num_ports;
+ unsigned port;
ib_api_status_t status;
int mlnx_epi_supported = 0;
ni = ib_smp_get_payload_ptr(osm_madw_get_smp_ptr(madw));
- if (ni->node_type == IB_NODE_TYPE_SWITCH) {
- port = 0;
- num_ports = osm_node_get_num_physp(node);
- } else {
- port = ib_node_info_get_local_port_num(ni);
- num_ports = port + 1;
- }
+ port = ib_node_info_get_local_port_num(ni);
if (sm->p_subn->opt.fdr10)
mlnx_epi_supported = is_mlnx_ext_port_info_supported(ni->device_id);
@@ -309,25 +303,23 @@ static void ni_rcv_get_port_info(IN osm_sm_t * sm, IN osm_node_t * node,
context.pi_context.active_transition = FALSE;
context.pi_context.client_rereg = FALSE;
- for (; port < num_ports; port++) {
- status = osm_req_get(sm, osm_physp_get_dr_path_ptr(physp),
- IB_MAD_ATTR_PORT_INFO, cl_hton32(port),
+ status = osm_req_get(sm, osm_physp_get_dr_path_ptr(physp),
+ IB_MAD_ATTR_PORT_INFO, cl_hton32(port),
+ TRUE, 0, CL_DISP_MSGID_NONE, &context);
+ if (status != IB_SUCCESS)
+ OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD02: "
+ "Failure initiating PortInfo request (%s)\n",
+ ib_get_err_str(status));
+ if (mlnx_epi_supported) {
+ status = osm_req_get(sm,
+ osm_physp_get_dr_path_ptr(physp),
+ IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO,
+ cl_hton32(port),
TRUE, 0, CL_DISP_MSGID_NONE, &context);
if (status != IB_SUCCESS)
- OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD02: "
- "Failure initiating PortInfo request (%s)\n",
+ OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D0B: "
+ "Failure initiating MLNX ExtPortInfo request (%s)\n",
ib_get_err_str(status));
- if (mlnx_epi_supported) {
- status = osm_req_get(sm,
- osm_physp_get_dr_path_ptr(physp),
- IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO,
- cl_hton32(port), TRUE, 0,
- CL_DISP_MSGID_NONE, &context);
- if (status != IB_SUCCESS)
- OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D0B: "
- "Failure initiating MLNX ExtPortInfo request (%s)\n",
- ib_get_err_str(status));
- }
}
}
@@ -566,9 +558,6 @@ static void ni_rcv_process_switch(IN osm_sm_t * sm, IN osm_node_t * p_node,
"Failure initiating SwitchInfo request (%s)\n",
ib_get_err_str(status));
- if (p_node->discovery_count == 1)
- ni_rcv_get_port_info(sm, p_node, p_madw);
-
OSM_LOG_EXIT(sm->p_log);
}
diff --git a/opensm/osm_port_info_rcv.c b/opensm/osm_port_info_rcv.c
index 5c12989..b3d4bd3 100644
--- a/opensm/osm_port_info_rcv.c
+++ b/opensm/osm_port_info_rcv.c
@@ -199,18 +199,75 @@ static void pi_rcv_process_endport(IN osm_sm_t * sm, IN osm_physp_t * p_physp,
/**********************************************************************
The plock must be held before calling this function.
**********************************************************************/
-static void pi_rcv_process_switch_port(IN osm_sm_t * sm, IN osm_node_t * p_node,
- IN osm_physp_t * p_physp,
- IN ib_port_info_t * p_pi)
+static void pi_rcv_process_switch_port0(IN osm_sm_t * sm,
+ IN osm_node_t * p_node,
+ IN osm_physp_t * p_physp,
+ IN ib_port_info_t * p_pi)
+{
+ ib_api_status_t status;
+ osm_madw_context_t context;
+ uint8_t port, num_ports;
+
+ OSM_LOG_ENTER(sm->p_log);
+
+ num_ports = osm_node_get_num_physp(p_node);
+
+ context.pi_context.node_guid = osm_node_get_node_guid(p_node);
+ context.pi_context.port_guid = osm_physp_get_port_guid(p_physp);
+ context.pi_context.set_method = FALSE;
+ context.pi_context.light_sweep = FALSE;
+ context.pi_context.active_transition = FALSE;
+ context.pi_context.client_rereg = FALSE;
+
+ for (port = 1; port < num_ports; port++) {
+ status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp),
+ IB_MAD_ATTR_PORT_INFO, cl_hton32(port),
+ FALSE,
+ ib_port_info_get_m_key(&p_physp->port_info),
+ CL_DISP_MSGID_NONE, &context);
+ if (status != IB_SUCCESS)
+ OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F16: "
+ "Failure initiating PortInfo request (%s)\n",
+ ib_get_err_str(status));
+ }
+
+ if (p_physp->need_update)
+ sm->p_subn->ignore_existing_lfts = TRUE;
+
+ pi_rcv_check_and_fix_lid(sm->p_log, p_pi, p_physp);
+
+ /*
+ Update the PortInfo attribute.
+ */
+ osm_physp_set_port_info(p_physp, p_pi, sm);
+
+ /* Determine if base switch port 0 */
+ if (p_node->sw &&
+ !ib_switch_info_is_enhanced_port0(&p_node->sw->switch_info))
+ /* PortState is not used on BSP0 but just in case it is DOWN */
+ p_physp->port_info = *p_pi;
+ pi_rcv_process_endport(sm, p_physp, p_pi);
+ OSM_LOG_EXIT(sm->p_log);
+}
+
+/**********************************************************************
+ The plock must be held before calling this function.
+**********************************************************************/
+static void pi_rcv_process_switch_ext_port(IN osm_sm_t * sm,
+ IN osm_node_t * p_node,
+ IN osm_physp_t * p_physp,
+ IN ib_port_info_t * p_pi)
{
ib_api_status_t status = IB_SUCCESS;
osm_madw_context_t context;
- osm_physp_t *p_remote_physp;
+ osm_physp_t *p_remote_physp, *physp0;
osm_node_t *p_remote_node;
+ ib_net64_t m_key;
unsigned data_vls;
uint8_t port_num;
uint8_t remote_port_num;
osm_dr_path_t path;
+ int mlnx_epi_supported = 0;
OSM_LOG_ENTER(sm->p_log);
@@ -220,10 +277,14 @@ static void pi_rcv_process_switch_port(IN osm_sm_t * sm, IN osm_node_t * p_node,
then ask for NodeInfo. Ignore the switch management port.
*/
port_num = osm_physp_get_port_num(p_physp);
+
+ if (sm->p_subn->opt.fdr10)
+ mlnx_epi_supported = is_mlnx_ext_port_info_supported(p_node->node_info.device_id);
+
/* if in_sweep_hop_0 is TRUE, then this means the SM is on the switch,
and we got switchInfo of our local switch. Do not continue
probing through the switch. */
- if (port_num != 0 && sm->p_subn->in_sweep_hop_0 == FALSE) {
+ if (sm->p_subn->in_sweep_hop_0 == FALSE) {
switch (ib_port_info_get_port_state(p_pi)) {
case IB_LINK_DOWN:
p_remote_physp = osm_physp_get_remote(p_physp);
@@ -259,6 +320,26 @@ static void pi_rcv_process_switch_port(IN osm_sm_t * sm, IN osm_node_t * p_node,
case IB_LINK_INIT:
case IB_LINK_ARMED:
case IB_LINK_ACTIVE:
+ physp0 = osm_node_get_physp_ptr(p_node, 0);
+ if (mlnx_epi_supported) {
+ m_key = ib_port_info_get_m_key(&physp0->port_info);
+
+ context.pi_context.node_guid = osm_node_get_node_guid(p_node);
+ context.pi_context.port_guid = osm_physp_get_port_guid(p_physp);
+ context.pi_context.set_method = FALSE;
+ context.pi_context.light_sweep = FALSE;
+ context.pi_context.active_transition = FALSE;
+ context.pi_context.client_rereg = FALSE;
+ status = osm_req_get(sm,
+ osm_physp_get_dr_path_ptr(p_physp),
+ IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO,
+ cl_hton32(port_num), FALSE, m_key,
+ CL_DISP_MSGID_NONE, &context);
+ if (status != IB_SUCCESS)
+ OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F11: "
+ "Failure initiating MLNX ExtPortInfo request (%s)\n",
+ ib_get_err_str(status));
+ }
/*
To avoid looping forever, only probe the port if it
is NOT the port that responded to the SMP.
@@ -316,46 +397,35 @@ static void pi_rcv_process_switch_port(IN osm_sm_t * sm, IN osm_node_t * p_node,
}
if (ib_port_info_get_port_state(p_pi) > IB_LINK_INIT && p_node->sw &&
- p_node->sw->need_update == 1 && port_num != 0)
+ p_node->sw->need_update == 1)
p_node->sw->need_update = 0;
if (p_physp->need_update)
sm->p_subn->ignore_existing_lfts = TRUE;
- if (port_num == 0)
- pi_rcv_check_and_fix_lid(sm->p_log, p_pi, p_physp);
-
/*
Update the PortInfo attribute.
*/
osm_physp_set_port_info(p_physp, p_pi, sm);
- if (port_num == 0) {
- /* Determine if base switch port 0 */
- if (p_node->sw &&
- !ib_switch_info_is_enhanced_port0(&p_node->sw->switch_info))
- /* PortState is not used on BSP0 but just in case it is DOWN */
- p_physp->port_info = *p_pi;
- pi_rcv_process_endport(sm, p_physp, p_pi);
- } else {
- if (ib_port_info_get_port_state(p_pi) == IB_LINK_DOWN)
- goto Exit;
+ if (ib_port_info_get_port_state(p_pi) == IB_LINK_DOWN)
+ goto Exit;
- p_remote_physp = osm_physp_get_remote(p_physp);
- if (p_remote_physp) {
- p_remote_node = osm_physp_get_node_ptr(p_remote_physp);
- if (p_remote_node->sw) {
- data_vls = 1U << (ib_port_info_get_op_vls(p_pi) - 1);
- if (data_vls >= IB_MAX_NUM_VLS)
- data_vls = IB_MAX_NUM_VLS - 1;
- if ((uint8_t)data_vls < sm->p_subn->min_sw_data_vls) {
- OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
- "Setting switch port minimal data VLs to:%u defined by node:0x%"
- PRIx64 ", port:%u\n", data_vls,
- cl_ntoh64(osm_node_get_node_guid(p_node)),
- port_num);
- sm->p_subn->min_sw_data_vls = data_vls;
- }
+ p_remote_physp = osm_physp_get_remote(p_physp);
+ if (p_remote_physp) {
+ p_remote_node = osm_physp_get_node_ptr(p_remote_physp);
+ if (p_remote_node->sw) {
+ data_vls = 1U << (ib_port_info_get_op_vls(p_pi) - 1);
+ if (data_vls >= IB_MAX_NUM_VLS)
+ data_vls = IB_MAX_NUM_VLS - 1;
+ if ((uint8_t)data_vls < sm->p_subn->min_sw_data_vls) {
+ OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
+ "Setting switch port minimal data VLs "
+ "to:%u defined by node:0x%"
+ PRIx64 ", port:%u\n", data_vls,
+ cl_ntoh64(osm_node_get_node_guid(p_node)),
+ port_num);
+ sm->p_subn->min_sw_data_vls = data_vls;
}
}
}
@@ -469,6 +539,18 @@ static void pi_rcv_get_pkey_slvl_vla_tables(IN osm_sm_t * sm,
OSM_LOG_EXIT(sm->p_log);
}
+static int osm_pi_rcv_update_self(IN osm_sm_t *sm, IN osm_physp_t *p_physp,
+ IN ib_port_info_t *p_pi)
+{
+ if (ib_port_info_get_port_state(p_pi) == IB_LINK_DOWN)
+ return 0;
+
+ if (sm->p_subn->need_update || p_physp->need_update > 1)
+ return 1;
+
+ return 0;
+}
+
static void pi_rcv_process_set(IN osm_sm_t * sm, IN osm_node_t * p_node,
IN uint8_t port_num, IN osm_madw_t * p_madw)
{
@@ -654,11 +736,7 @@ void osm_pi_rcv_process(IN void *context, IN void *data)
osm_dr_path_init(p_dr_path, p_smp->hop_count,
p_smp->initial_path);
- /* if port just inited or reached INIT state (external reset)
- request update for port related tables */
- p_physp->need_update =
- (ib_port_info_get_port_state(p_pi) == IB_LINK_INIT ||
- p_physp->need_update > 1) ? 1 : 0;
+ p_physp->need_update = osm_pi_rcv_update_self(sm, p_physp, p_pi);
switch (osm_node_get_type(p_node)) {
case IB_NODE_TYPE_CA:
@@ -675,7 +753,12 @@ void osm_pi_rcv_process(IN void *context, IN void *data)
p_port->discovery_count++;
p_node->physp_discovered[port_num] = 1;
}
- pi_rcv_process_switch_port(sm, p_node, p_physp, p_pi);
+ if (port_num == 0)
+ pi_rcv_process_switch_port0(sm, p_node,
+ p_physp, p_pi);
+ else
+ pi_rcv_process_switch_ext_port(sm, p_node,
+ p_physp, p_pi);
break;
default:
OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F07: "
@@ -688,7 +771,7 @@ void osm_pi_rcv_process(IN void *context, IN void *data)
/*
Get the tables on the physp.
*/
- if (p_physp->need_update || sm->p_subn->need_update)
+ if (p_physp->need_update)
pi_rcv_get_pkey_slvl_vla_tables(sm, p_node, p_physp);
}
diff --git a/opensm/osm_sw_info_rcv.c b/opensm/osm_sw_info_rcv.c
index 02f6ab2..98f7b81 100644
--- a/opensm/osm_sw_info_rcv.c
+++ b/opensm/osm_sw_info_rcv.c
@@ -333,6 +333,48 @@ static boolean_t si_rcv_process_existing(IN osm_sm_t * sm,
return is_change_detected;
}
+static void si_rcv_get_sp0_info(IN osm_sm_t * sm, IN osm_node_t * node)
+{
+ osm_madw_context_t context;
+ osm_physp_t *physp;
+ ib_api_status_t status;
+ int mlnx_epi_supported = 0;
+
+ physp = osm_node_get_physp_ptr(node, 0);
+
+ context.pi_context.node_guid = osm_node_get_node_guid(node);
+ context.pi_context.port_guid = osm_physp_get_port_guid(physp);
+ context.pi_context.set_method = FALSE;
+ context.pi_context.light_sweep = FALSE;
+ context.pi_context.active_transition = FALSE;
+ context.pi_context.client_rereg = FALSE;
+
+ status = osm_req_get(sm, osm_physp_get_dr_path_ptr(physp),
+ IB_MAD_ATTR_PORT_INFO, 0, TRUE, 0,
+ CL_DISP_MSGID_NONE, &context);
+ if (status != IB_SUCCESS)
+ OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3611: "
+ "Failure initiating PortInfo request (%s)\n",
+ ib_get_err_str(status));
+
+ if (ib_switch_info_is_enhanced_port0(&node->sw->switch_info) &&
+ sm->p_subn->opt.fdr10) {
+ mlnx_epi_supported = is_mlnx_ext_port_info_supported(node->node_info.device_id);
+ if (mlnx_epi_supported) {
+ status = osm_req_get(sm,
+ osm_physp_get_dr_path_ptr(physp),
+ IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO,
+ 0, TRUE, 0,
+ CL_DISP_MSGID_NONE, &context);
+ if (status != IB_SUCCESS)
+ OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3616: "
+ "Failure initiating MLNX ExtPortInfo request (%s)\n",
+ ib_get_err_str(status));
+ }
+ }
+
+}
+
void osm_si_rcv_process(IN void *context, IN void *data)
{
osm_sm_t *sm = context;
@@ -390,6 +432,7 @@ void osm_si_rcv_process(IN void *context, IN void *data)
/* we might get back a request for signaling change was detected */
sm->p_subn->force_heavy_sweep = TRUE;
+ si_rcv_get_sp0_info(sm, p_node);
CL_PLOCK_RELEASE(sm->p_lock);
Exit:
OSM_LOG_EXIT(sm->p_log);
--
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 3/5] opensm: Better handle topology changes in the fabric
[not found] ` <1391425516-14462-1-git-send-email-alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-02-03 11:05 ` [PATCH 2/5] opensm: change discovery order of switch data Alex Netes
@ 2014-02-03 11:05 ` Alex Netes
[not found] ` <1391425516-14462-3-git-send-email-alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-02-03 11:05 ` [PATCH 4/5] opensm/osm_port_info_rcv.c: Reread pkeys from SP0 if switch rebooted during a sweep Alex Netes
` (2 subsequent siblings)
4 siblings, 1 reply; 11+ messages in thread
From: Alex Netes @ 2014-02-03 11:05 UTC (permalink / raw)
To: hal-VPRAkNaXOzVWk0Htik3J/w, linux-rdma-u79uwXL29TY76Z2rM5mHXA; +Cc: Alex Netes
The patch tries to solve the following problem:
When newly discovered switch is rebooted during the configuration cycle,
SM end-up setting all Initialized ports to Active, but the configuration
on the switch such as Pkey tables, QoS, etc' might be incorrect.
The fix is solves this in two steps. First, turn need_update flag when
switch's StateChange bit is detected ON or CA's neighbor switch has
StateChange bit ON. Second, clear StateChange bit on the switches, before
any configuration is done. This assures that we don't miss changes in
the fabric. If a switch was rebooted during a sweep, we will detect it
in a sequential sweep and configure all its' neighbors from scratch.
Signed-off-by: Alex Netes <alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
include/iba/ib_types.h | 60 ++++++++++++++++++++++++++++++++++++++++
opensm/osm_port_info_rcv.c | 27 +++++++++++++++++-
opensm/osm_state_mgr.c | 66 ++++++++++++++++++++++++++++++++++++++++++++
opensm/osm_ucast_mgr.c | 15 ++-------
4 files changed, 156 insertions(+), 12 deletions(-)
diff --git a/include/iba/ib_types.h b/include/iba/ib_types.h
index a5136d4..249ee16 100644
--- a/include/iba/ib_types.h
+++ b/include/iba/ib_types.h
@@ -6507,6 +6507,34 @@ ib_switch_info_clear_state_change(IN ib_switch_info_t * const p_si)
* SEE ALSO
*********/
+/****f* IBA Base: Types/ib_switch_info_set_state_change
+* NAME
+* ib_switch_info_set_state_change
+*
+* DESCRIPTION
+* Clears the switch's state change bit.
+*
+* SYNOPSIS
+*/
+static inline void OSM_API
+ib_switch_info_set_state_change(IN ib_switch_info_t * const p_si)
+{
+ p_si->life_state = (uint8_t) ((p_si->life_state & ~IB_SWITCH_PSC) | IB_SWITCH_PSC);
+}
+
+/*
+* PARAMETERS
+* p_si
+* [in] Pointer to a SwitchInfo attribute.
+*
+* RETURN VALUES
+* None
+*
+* NOTES
+*
+* SEE ALSO
+*********/
+
/****f* IBA Base: Types/ib_switch_info_get_opt_sl2vlmapping
* NAME
* ib_switch_info_get_state_opt_sl2vlmapping
@@ -6535,6 +6563,38 @@ ib_switch_info_get_opt_sl2vlmapping(IN const ib_switch_info_t * const p_si)
* SEE ALSO
*********/
+/****f* IBA Base: Types/ib_switch_info_set_life_time
+* NAME
+* ib_switch_info_set_life_time
+*
+* DESCRIPTION
+* Sets the value of LifeTimeValue.
+*
+* SYNOPSIS
+*/
+static inline void OSM_API
+ib_switch_info_set_life_time(IN ib_switch_info_t * const p_si,
+ IN const uint8_t life_time_val)
+{
+ p_si->life_state = (p_si->life_state & 0x1f) |
+ (life_time_val << 3);
+}
+
+/*
+* PARAMETERS
+* p_si
+* [in] Pointer to a SwitchInfo attribute.
+* life_time_val
+* [in] LiveTimeValue.
+*
+* RETURN VALUES
+* None.
+*
+* NOTES
+*
+* SEE ALSO
+*********/
+
/****f* IBA Base: Types/ib_switch_info_is_enhanced_port0
* NAME
* ib_switch_info_is_enhanced_port0
diff --git a/opensm/osm_port_info_rcv.c b/opensm/osm_port_info_rcv.c
index b3d4bd3..d813f1a 100644
--- a/opensm/osm_port_info_rcv.c
+++ b/opensm/osm_port_info_rcv.c
@@ -397,6 +397,7 @@ static void pi_rcv_process_switch_ext_port(IN osm_sm_t * sm,
}
if (ib_port_info_get_port_state(p_pi) > IB_LINK_INIT && p_node->sw &&
+ !ib_switch_info_get_state_change(&p_node->sw->switch_info) &&
p_node->sw->need_update == 1)
p_node->sw->need_update = 0;
@@ -545,7 +546,8 @@ static int osm_pi_rcv_update_self(IN osm_sm_t *sm, IN osm_physp_t *p_physp,
if (ib_port_info_get_port_state(p_pi) == IB_LINK_DOWN)
return 0;
- if (sm->p_subn->need_update || p_physp->need_update > 1)
+ if (sm->p_subn->need_update || p_physp->need_update > 1 ||
+ ib_port_info_get_port_state(p_pi) == IB_LINK_INIT)
return 1;
return 0;
@@ -608,6 +610,28 @@ static void pi_rcv_process_set(IN osm_sm_t * sm, IN osm_node_t * p_node,
OSM_LOG_EXIT(sm->p_log);
}
+static int osm_pi_rcv_update_neighbor(IN osm_physp_t *p_physp)
+{
+ osm_physp_t *p_rem_physp = p_physp->p_remote_physp;
+ osm_node_t *p_node;
+
+ /*
+ * Our own port - this is the only case where CA port
+ * is discovered before its' neighbor port
+ */
+ if (!p_rem_physp)
+ return p_physp->need_update;
+
+ p_node = osm_physp_get_node_ptr(p_rem_physp);
+ CL_ASSERT(p_node);
+
+ /* CA/RTR to CA/RTR connection */
+ if (!p_node->sw)
+ return p_physp->need_update;
+
+ return (ib_switch_info_get_state_change(&p_node->sw->switch_info) ? 1 : p_physp->need_update);
+}
+
void osm_pi_rcv_process(IN void *context, IN void *data)
{
osm_sm_t *sm = context;
@@ -745,6 +769,7 @@ void osm_pi_rcv_process(IN void *context, IN void *data)
p_port->discovery_count++;
p_node->physp_discovered[port_num] = 1;
}
+ p_physp->need_update = osm_pi_rcv_update_neighbor(p_physp);
pi_rcv_process_ca_or_router_port(sm, p_node, p_physp,
p_pi);
break;
diff --git a/opensm/osm_state_mgr.c b/opensm/osm_state_mgr.c
index c86627d..5080b22 100644
--- a/opensm/osm_state_mgr.c
+++ b/opensm/osm_state_mgr.c
@@ -531,6 +531,60 @@ static void query_sm_info(cl_map_item_t * item, void *cxt)
ib_get_err_str(ret));
}
+static void state_mgr_reset_state_change_bit(IN cl_map_item_t * obj,
+ IN void *context)
+{
+ osm_madw_context_t mad_context;
+ osm_switch_t *p_sw = (osm_switch_t *) obj;
+ osm_sm_t *sm = context;
+ osm_node_t *p_node;
+ osm_physp_t *p_physp;
+ osm_dr_path_t *p_path;
+ ib_api_status_t status;
+ ib_switch_info_t si;
+
+ OSM_LOG_ENTER(sm->p_log);
+
+ CL_ASSERT(p_sw);
+
+ p_node = p_sw->p_node;
+
+ CL_ASSERT(p_node);
+
+ p_physp = osm_node_get_physp_ptr(p_node, 0);
+ p_path = osm_physp_get_dr_path_ptr(p_physp);
+
+ if (!ib_switch_info_get_state_change(&p_sw->switch_info))
+ goto exit;
+
+ si = p_sw->switch_info;
+
+ ib_switch_info_set_state_change(&si);
+
+ OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
+ "Resetting PortStateChange on switch GUID 0x%016" PRIx64 "\n",
+ cl_ntoh64(osm_node_get_node_guid(p_node)));
+
+ mad_context.si_context.light_sweep = FALSE;
+ mad_context.si_context.node_guid = osm_node_get_node_guid(p_node);
+ mad_context.si_context.set_method = TRUE;
+ mad_context.si_context.lft_top_change = FALSE;
+
+ status = osm_req_set(sm, p_path, (uint8_t *) &si,
+ sizeof(si), IB_MAD_ATTR_SWITCH_INFO,
+ 0, FALSE,
+ ib_port_info_get_m_key(&p_physp->port_info),
+ CL_DISP_MSGID_NONE, &mad_context);
+
+ if (status != IB_SUCCESS)
+ OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 332A: "
+ "Sending SwitchInfo attribute failed (%s)\n",
+ ib_get_err_str(status));
+
+exit:
+ OSM_LOG_EXIT(sm->p_log);
+}
+
static void state_mgr_update_node_desc(IN cl_map_item_t * obj, IN void *context)
{
osm_madw_context_t mad_context;
@@ -576,6 +630,14 @@ exit:
OSM_LOG_EXIT(sm->p_log);
}
+void osm_reset_switch_state_change_bit(IN osm_opensm_t *osm)
+{
+ CL_PLOCK_ACQUIRE(&osm->lock);
+ cl_qmap_apply_func(&osm->subn.sw_guid_tbl, state_mgr_reset_state_change_bit,
+ &osm->sm);
+ CL_PLOCK_RELEASE(&osm->lock);
+}
+
void osm_update_node_desc(IN osm_opensm_t *osm)
{
CL_PLOCK_ACQUIRE(&osm->lock);
@@ -1340,6 +1402,10 @@ repeat_discovery:
if (sm->p_subn->sm_state == IB_SMINFO_STATE_DISCOVERING)
osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_DISCOVERY_COMPLETED);
+ osm_reset_switch_state_change_bit(sm->p_subn->p_osm);
+ if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
+ return;
+
osm_pkey_mgr_process(sm->p_subn->p_osm);
/* try to restore SA DB (this should be before lid_mgr
diff --git a/opensm/osm_ucast_mgr.c b/opensm/osm_ucast_mgr.c
index f53e288..8194307 100644
--- a/opensm/osm_ucast_mgr.c
+++ b/opensm/osm_ucast_mgr.c
@@ -938,18 +938,11 @@ static void ucast_mgr_set_fwd_top(IN cl_map_item_t * p_map_item,
} else
context.si_context.lft_top_change = FALSE;
- /* check to see if the change state bit is on. If it is - then we
- need to clear it. */
- if (ib_switch_info_get_state_change(&si))
- life_state = ((p_mgr->p_subn->opt.packet_life_time << 3)
- | (si.life_state & IB_SWITCH_PSC)) & 0xfc;
- else
- life_state = (p_mgr->p_subn->opt.packet_life_time << 3) & 0xf8;
-
- if (life_state != si.life_state || ib_switch_info_get_state_change(&si)) {
+ life_state = si.life_state;
+ ib_switch_info_set_life_time(&si, p_mgr->p_subn->opt.packet_life_time);
+
+ if (life_state != si.life_state)
set_swinfo_require = TRUE;
- si.life_state = life_state;
- }
if (set_swinfo_require) {
OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
--
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 11+ messages in thread