public inbox for linux-rdma@vger.kernel.org
 help / color / mirror / Atom feed
From: Sasha Khapyorsky <sashak-smomgflXvOZWk0Htik3J/w@public.gmane.org>
To: Hal Rosenstock <hnrose-Wuw85uim5zDR7s880joybQ@public.gmane.org>
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Subject: Re: [PATCH] opensm/osm_trap_rcv.c: More minor reorg of trap_rcv_process_request
Date: Tue, 3 Nov 2009 04:00:28 +0200	[thread overview]
Message-ID: <20091103020028.GB17404@me> (raw)
In-Reply-To: <20091102115051.GA32233-Wuw85uim5zDR7s880joybQ@public.gmane.org>

On 06:50 Mon 02 Nov     , Hal Rosenstock wrote:
> 
> Move some code out into separate handle_babbling_port routine
> for readability/maintainability 
> 
> Signed-off-by: Hal Rosenstock <hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
> ---
> diff --git a/opensm/opensm/osm_trap_rcv.c b/opensm/opensm/osm_trap_rcv.c
> index 5790461..1621fbc 100644
> --- a/opensm/opensm/osm_trap_rcv.c
> +++ b/opensm/opensm/osm_trap_rcv.c
> @@ -312,6 +312,61 @@ static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci,
>  			cl_ntoh16(source_lid), cl_ntoh64(trans_id));
>  }
>  
> +static int handle_babbling_port(osm_sm_t *sm, ib_mad_notice_attr_t *p_ntci,
> +				uint32_t num_received,
> +				boolean_t physp_change_trap,
> +				osm_physp_t **pp_physp,
> +				boolean_t *run_heavy_sweep,
> +				uint64_t *event_wheel_timeout)
> +{
> +	if (print_num_received(num_received))
> +		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3804: "
> +			"Received trap %u times consecutively\n",
> +			num_received);
> +	/* If the trap provides info about a bad port, mark it as unhealthy. */
> +	if (physp_change_trap == TRUE) {
> +		/* get the port */
> +		*pp_physp = get_physp_by_lid_and_num(sm,
> +						     cl_ntoh16(p_ntci->data_details.ntc_129_131.lid),
> +						     p_ntci->data_details.ntc_129_131.port_num);
> +
> +		if (!*pp_physp)
> +			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3805: "
> +				"Failed to find physical port by lid:%u num:%u\n",
> +				cl_ntoh16(p_ntci->data_details.ntc_129_131.lid),
> +				p_ntci->data_details.ntc_129_131.port_num);
> +		else {
> +			/* When babbling port policy option is enabled and
> +			   Threshold for disabling a "babbling" port is exceeded */
> +			if (sm->p_subn->opt.babbling_port_policy &&
> +			    num_received >= 250 &&
> +			    disable_port(sm, *pp_physp) == 0)
> +				return 1;
> +
> +			OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
> +				"Marking unhealthy physical port by lid:%u num:%u\n",
> +				cl_ntoh16(p_ntci->data_details.ntc_129_131.lid),
> +				p_ntci->data_details.ntc_129_131.port_num);
> +			/* check if the current state of the p_physp is healthy. If
> +			   it is - then this is a first change of state. Run a heavy sweep.
> +			   if it is not - no need to mark it again - just restart the timer. */
> +			if (osm_physp_is_healthy(*pp_physp)) {
> +				osm_physp_set_health(*pp_physp, FALSE);
> +				/* Make sure we sweep again - force a heavy sweep. */
> +				/* The sweep should be done only after the re-registration, or
> +				   else we'll be losing track of the timer. */
> +				*run_heavy_sweep = TRUE;
> +			}
> +			/* If we are marking the port as unhealthy - we want to
> +			   keep this for a longer period of time than the
> +			   OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT. Use the
> +			   OSM_DEFAULT_UNHEALTHY_TIMEOUT */
> +			*event_wheel_timeout = OSM_DEFAULT_UNHEALTHY_TIMEOUT;
> +	}
> +	return 0;
> +}
> +

It looks that you are moving the code as it is to a separate function
where many flow switchers (run_heavy_sweep, event_wheel_timeout,
pp_physp) are magically modified.

What about something like below instead?

Sasha


>From 5ed9114a32f9536123af4bd135c7cc603d340f92 Mon Sep 17 00:00:00 2001
From: Sasha Khapyorsky <sashak-smomgflXvOZWk0Htik3J/w@public.gmane.org>
Date: Tue, 3 Nov 2009 03:46:54 +0200
Subject: [PATCH] opensm/osm_trap_rcv.c: some consolidation

Consolidate noisy port handling code in shutup_noisy_port() function.
As before it will try to disable port or to mark it unhealthy.

Signed-off-by: Sasha Khapyorsky <sashak-smomgflXvOZWk0Htik3J/w@public.gmane.org>
---
 opensm/opensm/osm_trap_rcv.c |  113 ++++++++++++++++++++----------------------
 1 files changed, 53 insertions(+), 60 deletions(-)

diff --git a/opensm/opensm/osm_trap_rcv.c b/opensm/opensm/osm_trap_rcv.c
index 0ee4e77..dae892e 100644
--- a/opensm/opensm/osm_trap_rcv.c
+++ b/opensm/opensm/osm_trap_rcv.c
@@ -226,7 +226,6 @@ static int disable_port(osm_sm_t *sm, osm_physp_t *p)
 	uint8_t payload[IB_SMP_DATA_SIZE];
 	osm_madw_context_t context;
 	ib_port_info_t *pi = (ib_port_info_t *)payload;
-	int ret;
 
 	/* select the nearest port to master opensm */
 	if (p->p_remote_physp &&
@@ -236,10 +235,6 @@ static int disable_port(osm_sm_t *sm, osm_physp_t *p)
 	/* If trap 131, might want to disable peer port if available */
 	/* but peer port has been observed not to respond to SM requests */
 
-	OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3810: "
-		"Disabling physical port 0x%016" PRIx64 " num:%u\n",
-		cl_ntoh64(osm_physp_get_port_guid(p)), p->port_num);
-
 	memcpy(payload, &p->port_info, sizeof(ib_port_info_t));
 
 	/* Set port to disabled/down */
@@ -253,15 +248,10 @@ static int disable_port(osm_sm_t *sm, osm_physp_t *p)
 	context.pi_context.light_sweep = FALSE;
 	context.pi_context.active_transition = FALSE;
 
-	ret = osm_req_set(sm, osm_physp_get_dr_path_ptr(p),
-			  payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO,
-			  cl_hton32(osm_physp_get_port_num(p)),
-			  CL_DISP_MSGID_NONE, &context);
-	if (ret)
-		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3811: "
-			"Request to set PortInfo failed\n");
-
-	return ret;
+	return osm_req_set(sm, osm_physp_get_dr_path_ptr(p),
+			   payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO,
+			   cl_hton32(osm_physp_get_port_num(p)),
+			   CL_DISP_MSGID_NONE, &context);
 }
 
 static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci,
@@ -301,6 +291,42 @@ static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci,
 			cl_ntoh16(source_lid), cl_ntoh64(trans_id));
 }
 
+static int shutup_noisy_port(osm_sm_t *sm, uint16_t lid, uint8_t port,
+			     unsigned num)
+{
+	osm_physp_t *p = get_physp_by_lid_and_num(sm, lid, port);
+	if (!p) {
+		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3805: "
+			"Failed to find physical port by lid:%u num:%u\n",
+			lid, port);
+		return -1;
+	}
+
+	/* When babbling port policy option is enabled and
+	   Threshold for disabling a "babbling" port is exceeded */
+	if (sm->p_subn->opt.babbling_port_policy && num >= 250) {
+		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
+			"Disabling noisy physical port 0x%016" PRIx64
+			": lid %u, num %u\n",
+			cl_ntoh64(osm_physp_get_port_guid(p)), lid, port);
+		if (disable_port(sm, p))
+			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3811: "
+				"Failed to disable.\n");
+		else
+			return 1;
+	}
+
+	/* check if the current state of the p_physp is healthy. If
+	   it is - then this is a first change of state. Run a heavy sweep. */
+	if (osm_physp_is_healthy(p)) {
+		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
+			"Marking unhealthy physical port by lid:%u num:%u\n",
+			lid, port);
+		osm_physp_set_health(p, FALSE);
+		return 2;
+	}
+	return 0;
+}
 /**********************************************************************
  **********************************************************************/
 static void trap_rcv_process_request(IN osm_sm_t * sm,
@@ -438,7 +464,7 @@ static void trap_rcv_process_request(IN osm_sm_t * sm,
 		/* Now we know how many times it provided this trap */
 		if (num_received > 10) {
 			if (print_num_received(num_received))
-				OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3804: "
+				OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
 					"Received trap %u times consecutively\n",
 					num_received);
 			/*
@@ -446,49 +472,17 @@ static void trap_rcv_process_request(IN osm_sm_t * sm,
 			 * we mark it as unhealthy.
 			 */
 			if (physp_change_trap == TRUE) {
-				/* get the port */
-				p_physp = get_physp_by_lid_and_num(sm,
-								   cl_ntoh16
-								   (source_lid),
-								   port_num);
-
-				if (!p_physp)
-					OSM_LOG(sm->p_log, OSM_LOG_ERROR,
-						"ERR 3805: "
-						"Failed to find physical port by lid:%u num:%u\n",
-						cl_ntoh16(source_lid),
-						port_num);
-				else {
-					/* When babbling port policy option is enabled and
-					   Threshold for disabling a "babbling" port is exceeded */
-					if (sm->p_subn->opt.
-					    babbling_port_policy
-					    && num_received >= 250
-					    && disable_port(sm, p_physp) == 0)
-						goto Exit;
-
-					OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
-						"Marking unhealthy physical port by lid:%u num:%u\n",
-						cl_ntoh16(source_lid),
-						port_num);
-					/* check if the current state of the p_physp is healthy. If
-					   it is - then this is a first change of state. Run a heavy sweep.
-					   if it is not - no need to mark it again - just restart the timer. */
-					if (osm_physp_is_healthy(p_physp)) {
-						osm_physp_set_health(p_physp,
-								     FALSE);
-						/* Make sure we sweep again - force a heavy sweep. */
-						/* The sweep should be done only after the re-registration, or
-						   else we'll be losing track of the timer. */
-						run_heavy_sweep = TRUE;
-					}
-					/* If we are marking the port as unhealthy - we want to
-					   keep this for a longer period of time than the
-					   OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT. Use the
-					   OSM_DEFAULT_UNHEALTHY_TIMEOUT */
-					event_wheel_timeout =
-					    OSM_DEFAULT_UNHEALTHY_TIMEOUT;
-				}
+				int ret = shutup_noisy_port(sm,
+							    cl_ntoh16(source_lid),
+							    port_num,
+							    num_received);
+				if (ret == 1) /* port disabled */
+					goto Exit;
+				else if (ret == 2) /* unhealthy - run sweep */
+					run_heavy_sweep = TRUE;
+				/* in any case increase timeout interval */
+				event_wheel_timeout =
+				    OSM_DEFAULT_UNHEALTHY_TIMEOUT;
 			}
 		}
 
@@ -508,8 +502,7 @@ static void trap_rcv_process_request(IN osm_sm_t * sm,
 		if (num_received > 10 && run_heavy_sweep == FALSE) {
 			if (print_num_received(num_received))
 				OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
-					"Continuously received this trap %u times. Ignoring\n",
-					num_received);
+					"Ignoring noisy traps.\n");
 			goto Exit;
 		}
 	}
-- 
1.6.5.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  parent reply	other threads:[~2009-11-03  2:00 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-11-02 11:50 [PATCH] opensm/osm_trap_rcv.c: More minor reorg of trap_rcv_process_request Hal Rosenstock
     [not found] ` <20091102115051.GA32233-Wuw85uim5zDR7s880joybQ@public.gmane.org>
2009-11-03  2:00   ` Sasha Khapyorsky [this message]
2009-11-03 13:50     ` Hal Rosenstock

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20091103020028.GB17404@me \
    --to=sashak-smomgflxvozwk0htik3j/w@public.gmane.org \
    --cc=hnrose-Wuw85uim5zDR7s880joybQ@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox