* [PATCH] opensm/osm_trap_rcv.c: More minor reorg of trap_rcv_process_request
@ 2009-11-02 11:50 Hal Rosenstock
[not found] ` <20091102115051.GA32233-Wuw85uim5zDR7s880joybQ@public.gmane.org>
0 siblings, 1 reply; 3+ messages in thread
From: Hal Rosenstock @ 2009-11-02 11:50 UTC (permalink / raw)
To: sashak-smomgflXvOZWk0Htik3J/w; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
Move some code out into separate handle_babbling_port routine
for readability/maintainability
Signed-off-by: Hal Rosenstock <hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
---
diff --git a/opensm/opensm/osm_trap_rcv.c b/opensm/opensm/osm_trap_rcv.c
index 5790461..1621fbc 100644
--- a/opensm/opensm/osm_trap_rcv.c
+++ b/opensm/opensm/osm_trap_rcv.c
@@ -312,6 +312,61 @@ static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci,
cl_ntoh16(source_lid), cl_ntoh64(trans_id));
}
+static int handle_babbling_port(osm_sm_t *sm, ib_mad_notice_attr_t *p_ntci,
+ uint32_t num_received,
+ boolean_t physp_change_trap,
+ osm_physp_t **pp_physp,
+ boolean_t *run_heavy_sweep,
+ uint64_t *event_wheel_timeout)
+{
+ if (print_num_received(num_received))
+ OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3804: "
+ "Received trap %u times consecutively\n",
+ num_received);
+ /* If the trap provides info about a bad port, mark it as unhealthy. */
+ if (physp_change_trap == TRUE) {
+ /* get the port */
+ *pp_physp = get_physp_by_lid_and_num(sm,
+ cl_ntoh16(p_ntci->data_details.ntc_129_131.lid),
+ p_ntci->data_details.ntc_129_131.port_num);
+
+ if (!*pp_physp)
+ OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3805: "
+ "Failed to find physical port by lid:%u num:%u\n",
+ cl_ntoh16(p_ntci->data_details.ntc_129_131.lid),
+ p_ntci->data_details.ntc_129_131.port_num);
+ else {
+ /* When babbling port policy option is enabled and
+ Threshold for disabling a "babbling" port is exceeded */
+ if (sm->p_subn->opt.babbling_port_policy &&
+ num_received >= 250 &&
+ disable_port(sm, *pp_physp) == 0)
+ return 1;
+
+ OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
+ "Marking unhealthy physical port by lid:%u num:%u\n",
+ cl_ntoh16(p_ntci->data_details.ntc_129_131.lid),
+ p_ntci->data_details.ntc_129_131.port_num);
+ /* check if the current state of the p_physp is healthy. If
+ it is - then this is a first change of state. Run a heavy sweep.
+ if it is not - no need to mark it again - just restart the timer. */
+ if (osm_physp_is_healthy(*pp_physp)) {
+ osm_physp_set_health(*pp_physp, FALSE);
+ /* Make sure we sweep again - force a heavy sweep. */
+ /* The sweep should be done only after the re-registration, or
+ else we'll be losing track of the timer. */
+ *run_heavy_sweep = TRUE;
+ }
+ /* If we are marking the port as unhealthy - we want to
+ keep this for a longer period of time than the
+ OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT. Use the
+ OSM_DEFAULT_UNHEALTHY_TIMEOUT */
+ *event_wheel_timeout = OSM_DEFAULT_UNHEALTHY_TIMEOUT;
+ }
+ }
+ return 0;
+}
+
/**********************************************************************
**********************************************************************/
static void
@@ -454,68 +509,11 @@ trap_rcv_process_request(IN osm_sm_t * sm, IN const osm_madw_t * const p_madw)
trap_key);
/* Now we know how many times it provided this trap */
- if (num_received > 10) {
- if (print_num_received(num_received))
- OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3804: "
- "Received trap %u times consecutively\n",
- num_received);
- /*
- * If the trap provides info about a bad port
- * we mark it as unhealthy.
- */
- if (physp_change_trap == TRUE) {
- /* get the port */
- p_physp = get_physp_by_lid_and_num(sm,
- cl_ntoh16
- (p_ntci->
- data_details.
- ntc_129_131.
- lid),
- port_num);
-
- if (!p_physp)
- OSM_LOG(sm->p_log, OSM_LOG_ERROR,
- "ERR 3805: "
- "Failed to find physical port by lid:%u num:%u\n",
- cl_ntoh16(p_ntci->data_details.
- ntc_129_131.lid),
- p_ntci->data_details.
- ntc_129_131.port_num);
- else {
- /* When babbling port policy option is enabled and
- Threshold for disabling a "babbling" port is exceeded */
- if (sm->p_subn->opt.
- babbling_port_policy
- && num_received >= 250
- && disable_port(sm, p_physp) == 0)
- goto Exit;
-
- OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
- "Marking unhealthy physical port by lid:%u num:%u\n",
- cl_ntoh16(p_ntci->data_details.
- ntc_129_131.lid),
- p_ntci->data_details.
- ntc_129_131.port_num);
- /* check if the current state of the p_physp is healthy. If
- it is - then this is a first change of state. Run a heavy sweep.
- if it is not - no need to mark it again - just restart the timer. */
- if (osm_physp_is_healthy(p_physp)) {
- osm_physp_set_health(p_physp,
- FALSE);
- /* Make sure we sweep again - force a heavy sweep. */
- /* The sweep should be done only after the re-registration, or
- else we'll be losing track of the timer. */
- run_heavy_sweep = TRUE;
- }
- /* If we are marking the port as unhealthy - we want to
- keep this for a longer period of time than the
- OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT. Use the
- OSM_DEFAULT_UNHEALTHY_TIMEOUT */
- event_wheel_timeout =
- OSM_DEFAULT_UNHEALTHY_TIMEOUT;
- }
- }
- }
+ if (num_received > 10 &&
+ handle_babbling_port(sm, p_ntci, num_received,
+ physp_change_trap, &p_physp,
+ &run_heavy_sweep, &event_wheel_timeout))
+ goto Exit;
/* restart the aging anyway */
/* If physp_change_trap is TRUE - then use a callback to unset
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 3+ messages in thread[parent not found: <20091102115051.GA32233-Wuw85uim5zDR7s880joybQ@public.gmane.org>]
* Re: [PATCH] opensm/osm_trap_rcv.c: More minor reorg of trap_rcv_process_request [not found] ` <20091102115051.GA32233-Wuw85uim5zDR7s880joybQ@public.gmane.org> @ 2009-11-03 2:00 ` Sasha Khapyorsky 2009-11-03 13:50 ` Hal Rosenstock 0 siblings, 1 reply; 3+ messages in thread From: Sasha Khapyorsky @ 2009-11-03 2:00 UTC (permalink / raw) To: Hal Rosenstock; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA On 06:50 Mon 02 Nov , Hal Rosenstock wrote: > > Move some code out into separate handle_babbling_port routine > for readability/maintainability > > Signed-off-by: Hal Rosenstock <hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> > --- > diff --git a/opensm/opensm/osm_trap_rcv.c b/opensm/opensm/osm_trap_rcv.c > index 5790461..1621fbc 100644 > --- a/opensm/opensm/osm_trap_rcv.c > +++ b/opensm/opensm/osm_trap_rcv.c > @@ -312,6 +312,61 @@ static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci, > cl_ntoh16(source_lid), cl_ntoh64(trans_id)); > } > > +static int handle_babbling_port(osm_sm_t *sm, ib_mad_notice_attr_t *p_ntci, > + uint32_t num_received, > + boolean_t physp_change_trap, > + osm_physp_t **pp_physp, > + boolean_t *run_heavy_sweep, > + uint64_t *event_wheel_timeout) > +{ > + if (print_num_received(num_received)) > + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3804: " > + "Received trap %u times consecutively\n", > + num_received); > + /* If the trap provides info about a bad port, mark it as unhealthy. */ > + if (physp_change_trap == TRUE) { > + /* get the port */ > + *pp_physp = get_physp_by_lid_and_num(sm, > + cl_ntoh16(p_ntci->data_details.ntc_129_131.lid), > + p_ntci->data_details.ntc_129_131.port_num); > + > + if (!*pp_physp) > + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3805: " > + "Failed to find physical port by lid:%u num:%u\n", > + cl_ntoh16(p_ntci->data_details.ntc_129_131.lid), > + p_ntci->data_details.ntc_129_131.port_num); > + else { > + /* When babbling port policy option is enabled and > + Threshold for disabling a "babbling" port is exceeded */ > + if (sm->p_subn->opt.babbling_port_policy && > + num_received >= 250 && > + disable_port(sm, *pp_physp) == 0) > + return 1; > + > + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, > + "Marking unhealthy physical port by lid:%u num:%u\n", > + cl_ntoh16(p_ntci->data_details.ntc_129_131.lid), > + p_ntci->data_details.ntc_129_131.port_num); > + /* check if the current state of the p_physp is healthy. If > + it is - then this is a first change of state. Run a heavy sweep. > + if it is not - no need to mark it again - just restart the timer. */ > + if (osm_physp_is_healthy(*pp_physp)) { > + osm_physp_set_health(*pp_physp, FALSE); > + /* Make sure we sweep again - force a heavy sweep. */ > + /* The sweep should be done only after the re-registration, or > + else we'll be losing track of the timer. */ > + *run_heavy_sweep = TRUE; > + } > + /* If we are marking the port as unhealthy - we want to > + keep this for a longer period of time than the > + OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT. Use the > + OSM_DEFAULT_UNHEALTHY_TIMEOUT */ > + *event_wheel_timeout = OSM_DEFAULT_UNHEALTHY_TIMEOUT; > + } > + return 0; > +} > + It looks that you are moving the code as it is to a separate function where many flow switchers (run_heavy_sweep, event_wheel_timeout, pp_physp) are magically modified. What about something like below instead? Sasha >From 5ed9114a32f9536123af4bd135c7cc603d340f92 Mon Sep 17 00:00:00 2001 From: Sasha Khapyorsky <sashak-smomgflXvOZWk0Htik3J/w@public.gmane.org> Date: Tue, 3 Nov 2009 03:46:54 +0200 Subject: [PATCH] opensm/osm_trap_rcv.c: some consolidation Consolidate noisy port handling code in shutup_noisy_port() function. As before it will try to disable port or to mark it unhealthy. Signed-off-by: Sasha Khapyorsky <sashak-smomgflXvOZWk0Htik3J/w@public.gmane.org> --- opensm/opensm/osm_trap_rcv.c | 113 ++++++++++++++++++++---------------------- 1 files changed, 53 insertions(+), 60 deletions(-) diff --git a/opensm/opensm/osm_trap_rcv.c b/opensm/opensm/osm_trap_rcv.c index 0ee4e77..dae892e 100644 --- a/opensm/opensm/osm_trap_rcv.c +++ b/opensm/opensm/osm_trap_rcv.c @@ -226,7 +226,6 @@ static int disable_port(osm_sm_t *sm, osm_physp_t *p) uint8_t payload[IB_SMP_DATA_SIZE]; osm_madw_context_t context; ib_port_info_t *pi = (ib_port_info_t *)payload; - int ret; /* select the nearest port to master opensm */ if (p->p_remote_physp && @@ -236,10 +235,6 @@ static int disable_port(osm_sm_t *sm, osm_physp_t *p) /* If trap 131, might want to disable peer port if available */ /* but peer port has been observed not to respond to SM requests */ - OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3810: " - "Disabling physical port 0x%016" PRIx64 " num:%u\n", - cl_ntoh64(osm_physp_get_port_guid(p)), p->port_num); - memcpy(payload, &p->port_info, sizeof(ib_port_info_t)); /* Set port to disabled/down */ @@ -253,15 +248,10 @@ static int disable_port(osm_sm_t *sm, osm_physp_t *p) context.pi_context.light_sweep = FALSE; context.pi_context.active_transition = FALSE; - ret = osm_req_set(sm, osm_physp_get_dr_path_ptr(p), - payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO, - cl_hton32(osm_physp_get_port_num(p)), - CL_DISP_MSGID_NONE, &context); - if (ret) - OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3811: " - "Request to set PortInfo failed\n"); - - return ret; + return osm_req_set(sm, osm_physp_get_dr_path_ptr(p), + payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO, + cl_hton32(osm_physp_get_port_num(p)), + CL_DISP_MSGID_NONE, &context); } static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci, @@ -301,6 +291,42 @@ static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci, cl_ntoh16(source_lid), cl_ntoh64(trans_id)); } +static int shutup_noisy_port(osm_sm_t *sm, uint16_t lid, uint8_t port, + unsigned num) +{ + osm_physp_t *p = get_physp_by_lid_and_num(sm, lid, port); + if (!p) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3805: " + "Failed to find physical port by lid:%u num:%u\n", + lid, port); + return -1; + } + + /* When babbling port policy option is enabled and + Threshold for disabling a "babbling" port is exceeded */ + if (sm->p_subn->opt.babbling_port_policy && num >= 250) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Disabling noisy physical port 0x%016" PRIx64 + ": lid %u, num %u\n", + cl_ntoh64(osm_physp_get_port_guid(p)), lid, port); + if (disable_port(sm, p)) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3811: " + "Failed to disable.\n"); + else + return 1; + } + + /* check if the current state of the p_physp is healthy. If + it is - then this is a first change of state. Run a heavy sweep. */ + if (osm_physp_is_healthy(p)) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Marking unhealthy physical port by lid:%u num:%u\n", + lid, port); + osm_physp_set_health(p, FALSE); + return 2; + } + return 0; +} /********************************************************************** **********************************************************************/ static void trap_rcv_process_request(IN osm_sm_t * sm, @@ -438,7 +464,7 @@ static void trap_rcv_process_request(IN osm_sm_t * sm, /* Now we know how many times it provided this trap */ if (num_received > 10) { if (print_num_received(num_received)) - OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3804: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Received trap %u times consecutively\n", num_received); /* @@ -446,49 +472,17 @@ static void trap_rcv_process_request(IN osm_sm_t * sm, * we mark it as unhealthy. */ if (physp_change_trap == TRUE) { - /* get the port */ - p_physp = get_physp_by_lid_and_num(sm, - cl_ntoh16 - (source_lid), - port_num); - - if (!p_physp) - OSM_LOG(sm->p_log, OSM_LOG_ERROR, - "ERR 3805: " - "Failed to find physical port by lid:%u num:%u\n", - cl_ntoh16(source_lid), - port_num); - else { - /* When babbling port policy option is enabled and - Threshold for disabling a "babbling" port is exceeded */ - if (sm->p_subn->opt. - babbling_port_policy - && num_received >= 250 - && disable_port(sm, p_physp) == 0) - goto Exit; - - OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, - "Marking unhealthy physical port by lid:%u num:%u\n", - cl_ntoh16(source_lid), - port_num); - /* check if the current state of the p_physp is healthy. If - it is - then this is a first change of state. Run a heavy sweep. - if it is not - no need to mark it again - just restart the timer. */ - if (osm_physp_is_healthy(p_physp)) { - osm_physp_set_health(p_physp, - FALSE); - /* Make sure we sweep again - force a heavy sweep. */ - /* The sweep should be done only after the re-registration, or - else we'll be losing track of the timer. */ - run_heavy_sweep = TRUE; - } - /* If we are marking the port as unhealthy - we want to - keep this for a longer period of time than the - OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT. Use the - OSM_DEFAULT_UNHEALTHY_TIMEOUT */ - event_wheel_timeout = - OSM_DEFAULT_UNHEALTHY_TIMEOUT; - } + int ret = shutup_noisy_port(sm, + cl_ntoh16(source_lid), + port_num, + num_received); + if (ret == 1) /* port disabled */ + goto Exit; + else if (ret == 2) /* unhealthy - run sweep */ + run_heavy_sweep = TRUE; + /* in any case increase timeout interval */ + event_wheel_timeout = + OSM_DEFAULT_UNHEALTHY_TIMEOUT; } } @@ -508,8 +502,7 @@ static void trap_rcv_process_request(IN osm_sm_t * sm, if (num_received > 10 && run_heavy_sweep == FALSE) { if (print_num_received(num_received)) OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, - "Continuously received this trap %u times. Ignoring\n", - num_received); + "Ignoring noisy traps.\n"); goto Exit; } } -- 1.6.5.1 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] opensm/osm_trap_rcv.c: More minor reorg of trap_rcv_process_request 2009-11-03 2:00 ` Sasha Khapyorsky @ 2009-11-03 13:50 ` Hal Rosenstock 0 siblings, 0 replies; 3+ messages in thread From: Hal Rosenstock @ 2009-11-03 13:50 UTC (permalink / raw) To: Sasha Khapyorsky; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA On Mon, Nov 2, 2009 at 9:00 PM, Sasha Khapyorsky <sashak@voltaire.com> wrote: > On 06:50 Mon 02 Nov , Hal Rosenstock wrote: >> >> Move some code out into separate handle_babbling_port routine >> for readability/maintainability >> >> Signed-off-by: Hal Rosenstock <hal.rosenstock@gmail.com> >> --- >> diff --git a/opensm/opensm/osm_trap_rcv.c b/opensm/opensm/osm_trap_rcv.c >> index 5790461..1621fbc 100644 >> --- a/opensm/opensm/osm_trap_rcv.c >> +++ b/opensm/opensm/osm_trap_rcv.c >> @@ -312,6 +312,61 @@ static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci, >> cl_ntoh16(source_lid), cl_ntoh64(trans_id)); >> } >> >> +static int handle_babbling_port(osm_sm_t *sm, ib_mad_notice_attr_t *p_ntci, >> + uint32_t num_received, >> + boolean_t physp_change_trap, >> + osm_physp_t **pp_physp, >> + boolean_t *run_heavy_sweep, >> + uint64_t *event_wheel_timeout) >> +{ >> + if (print_num_received(num_received)) >> + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3804: " >> + "Received trap %u times consecutively\n", >> + num_received); >> + /* If the trap provides info about a bad port, mark it as unhealthy. */ >> + if (physp_change_trap == TRUE) { >> + /* get the port */ >> + *pp_physp = get_physp_by_lid_and_num(sm, >> + cl_ntoh16(p_ntci->data_details.ntc_129_131.lid), >> + p_ntci->data_details.ntc_129_131.port_num); >> + >> + if (!*pp_physp) >> + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3805: " >> + "Failed to find physical port by lid:%u num:%u\n", >> + cl_ntoh16(p_ntci->data_details.ntc_129_131.lid), >> + p_ntci->data_details.ntc_129_131.port_num); >> + else { >> + /* When babbling port policy option is enabled and >> + Threshold for disabling a "babbling" port is exceeded */ >> + if (sm->p_subn->opt.babbling_port_policy && >> + num_received >= 250 && >> + disable_port(sm, *pp_physp) == 0) >> + return 1; >> + >> + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, >> + "Marking unhealthy physical port by lid:%u num:%u\n", >> + cl_ntoh16(p_ntci->data_details.ntc_129_131.lid), >> + p_ntci->data_details.ntc_129_131.port_num); >> + /* check if the current state of the p_physp is healthy. If >> + it is - then this is a first change of state. Run a heavy sweep. >> + if it is not - no need to mark it again - just restart the timer. */ >> + if (osm_physp_is_healthy(*pp_physp)) { >> + osm_physp_set_health(*pp_physp, FALSE); >> + /* Make sure we sweep again - force a heavy sweep. */ >> + /* The sweep should be done only after the re-registration, or >> + else we'll be losing track of the timer. */ >> + *run_heavy_sweep = TRUE; >> + } >> + /* If we are marking the port as unhealthy - we want to >> + keep this for a longer period of time than the >> + OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT. Use the >> + OSM_DEFAULT_UNHEALTHY_TIMEOUT */ >> + *event_wheel_timeout = OSM_DEFAULT_UNHEALTHY_TIMEOUT; >> + } >> + return 0; >> +} >> + > > It looks that you are moving the code as it is to a separate function > where many flow switchers (run_heavy_sweep, event_wheel_timeout, > pp_physp) are magically modified. > > What about something like below instead? Reviewed-by: Hal Rosenstock <hal.rosenstock@gmail.com> > > Sasha > > > >From 5ed9114a32f9536123af4bd135c7cc603d340f92 Mon Sep 17 00:00:00 2001 > From: Sasha Khapyorsky <sashak@voltaire.com> > Date: Tue, 3 Nov 2009 03:46:54 +0200 > Subject: [PATCH] opensm/osm_trap_rcv.c: some consolidation > > Consolidate noisy port handling code in shutup_noisy_port() function. > As before it will try to disable port or to mark it unhealthy. > > Signed-off-by: Sasha Khapyorsky <sashak@voltaire.com> > --- > opensm/opensm/osm_trap_rcv.c | 113 ++++++++++++++++++++---------------------- > 1 files changed, 53 insertions(+), 60 deletions(-) > > diff --git a/opensm/opensm/osm_trap_rcv.c b/opensm/opensm/osm_trap_rcv.c > index 0ee4e77..dae892e 100644 > --- a/opensm/opensm/osm_trap_rcv.c > +++ b/opensm/opensm/osm_trap_rcv.c > @@ -226,7 +226,6 @@ static int disable_port(osm_sm_t *sm, osm_physp_t *p) > uint8_t payload[IB_SMP_DATA_SIZE]; > osm_madw_context_t context; > ib_port_info_t *pi = (ib_port_info_t *)payload; > - int ret; > > /* select the nearest port to master opensm */ > if (p->p_remote_physp && > @@ -236,10 +235,6 @@ static int disable_port(osm_sm_t *sm, osm_physp_t *p) > /* If trap 131, might want to disable peer port if available */ > /* but peer port has been observed not to respond to SM requests */ > > - OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3810: " > - "Disabling physical port 0x%016" PRIx64 " num:%u\n", > - cl_ntoh64(osm_physp_get_port_guid(p)), p->port_num); > - > memcpy(payload, &p->port_info, sizeof(ib_port_info_t)); > > /* Set port to disabled/down */ > @@ -253,15 +248,10 @@ static int disable_port(osm_sm_t *sm, osm_physp_t *p) > context.pi_context.light_sweep = FALSE; > context.pi_context.active_transition = FALSE; > > - ret = osm_req_set(sm, osm_physp_get_dr_path_ptr(p), > - payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO, > - cl_hton32(osm_physp_get_port_num(p)), > - CL_DISP_MSGID_NONE, &context); > - if (ret) > - OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3811: " > - "Request to set PortInfo failed\n"); > - > - return ret; > + return osm_req_set(sm, osm_physp_get_dr_path_ptr(p), > + payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO, > + cl_hton32(osm_physp_get_port_num(p)), > + CL_DISP_MSGID_NONE, &context); > } > > static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci, > @@ -301,6 +291,42 @@ static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci, > cl_ntoh16(source_lid), cl_ntoh64(trans_id)); > } > > +static int shutup_noisy_port(osm_sm_t *sm, uint16_t lid, uint8_t port, > + unsigned num) > +{ > + osm_physp_t *p = get_physp_by_lid_and_num(sm, lid, port); > + if (!p) { > + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3805: " > + "Failed to find physical port by lid:%u num:%u\n", > + lid, port); > + return -1; > + } > + > + /* When babbling port policy option is enabled and > + Threshold for disabling a "babbling" port is exceeded */ > + if (sm->p_subn->opt.babbling_port_policy && num >= 250) { > + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, > + "Disabling noisy physical port 0x%016" PRIx64 > + ": lid %u, num %u\n", > + cl_ntoh64(osm_physp_get_port_guid(p)), lid, port); > + if (disable_port(sm, p)) > + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3811: " > + "Failed to disable.\n"); Might want to include port information in this log message. -- Hal > + else > + return 1; > + } > + > + /* check if the current state of the p_physp is healthy. If > + it is - then this is a first change of state. Run a heavy sweep. */ > + if (osm_physp_is_healthy(p)) { > + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, > + "Marking unhealthy physical port by lid:%u num:%u\n", > + lid, port); > + osm_physp_set_health(p, FALSE); > + return 2; > + } > + return 0; > +} > /********************************************************************** > **********************************************************************/ > static void trap_rcv_process_request(IN osm_sm_t * sm, > @@ -438,7 +464,7 @@ static void trap_rcv_process_request(IN osm_sm_t * sm, > /* Now we know how many times it provided this trap */ > if (num_received > 10) { > if (print_num_received(num_received)) > - OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3804: " > + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, > "Received trap %u times consecutively\n", > num_received); > /* > @@ -446,49 +472,17 @@ static void trap_rcv_process_request(IN osm_sm_t * sm, > * we mark it as unhealthy. > */ > if (physp_change_trap == TRUE) { > - /* get the port */ > - p_physp = get_physp_by_lid_and_num(sm, > - cl_ntoh16 > - (source_lid), > - port_num); > - > - if (!p_physp) > - OSM_LOG(sm->p_log, OSM_LOG_ERROR, > - "ERR 3805: " > - "Failed to find physical port by lid:%u num:%u\n", > - cl_ntoh16(source_lid), > - port_num); > - else { > - /* When babbling port policy option is enabled and > - Threshold for disabling a "babbling" port is exceeded */ > - if (sm->p_subn->opt. > - babbling_port_policy > - && num_received >= 250 > - && disable_port(sm, p_physp) == 0) > - goto Exit; > - > - OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, > - "Marking unhealthy physical port by lid:%u num:%u\n", > - cl_ntoh16(source_lid), > - port_num); > - /* check if the current state of the p_physp is healthy. If > - it is - then this is a first change of state. Run a heavy sweep. > - if it is not - no need to mark it again - just restart the timer. */ > - if (osm_physp_is_healthy(p_physp)) { > - osm_physp_set_health(p_physp, > - FALSE); > - /* Make sure we sweep again - force a heavy sweep. */ > - /* The sweep should be done only after the re-registration, or > - else we'll be losing track of the timer. */ > - run_heavy_sweep = TRUE; > - } > - /* If we are marking the port as unhealthy - we want to > - keep this for a longer period of time than the > - OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT. Use the > - OSM_DEFAULT_UNHEALTHY_TIMEOUT */ > - event_wheel_timeout = > - OSM_DEFAULT_UNHEALTHY_TIMEOUT; > - } > + int ret = shutup_noisy_port(sm, > + cl_ntoh16(source_lid), > + port_num, > + num_received); > + if (ret == 1) /* port disabled */ > + goto Exit; > + else if (ret == 2) /* unhealthy - run sweep */ > + run_heavy_sweep = TRUE; > + /* in any case increase timeout interval */ > + event_wheel_timeout = > + OSM_DEFAULT_UNHEALTHY_TIMEOUT; > } > } > > @@ -508,8 +502,7 @@ static void trap_rcv_process_request(IN osm_sm_t * sm, > if (num_received > 10 && run_heavy_sweep == FALSE) { > if (print_num_received(num_received)) > OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, > - "Continuously received this trap %u times. Ignoring\n", > - num_received); > + "Ignoring noisy traps.\n"); > goto Exit; > } > } > -- > 1.6.5.1 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > ^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2009-11-03 13:50 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-11-02 11:50 [PATCH] opensm/osm_trap_rcv.c: More minor reorg of trap_rcv_process_request Hal Rosenstock
[not found] ` <20091102115051.GA32233-Wuw85uim5zDR7s880joybQ@public.gmane.org>
2009-11-03 2:00 ` Sasha Khapyorsky
2009-11-03 13:50 ` Hal Rosenstock
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox