All of lore.kernel.org
 help / color / mirror / Atom feed
From: Line Holen <line.holen-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
To: Hal Rosenstock <hal-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
Cc: "linux-rdma
	(linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org)"
	<linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>,
	Daniel Klein <danielk-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Subject: Re: [PATCH opensm] osm_sm_state_mgr.c: Fix race condition during sm_state_mgr_send_master_sm_info_req
Date: Thu, 05 Dec 2013 11:20:39 +0100	[thread overview]
Message-ID: <52A05377.5070606@oracle.com> (raw)
In-Reply-To: <529DDBB2.80309-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>

Acked-by: Line Holen<Line.Holen-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>

On 12/03/13 14:25, Hal Rosenstock wrote:
> From: Daniel Klein<danielk-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
>
> When OpenSM changes SM state from standby to master, it sets m->p_polling_sm
> to NULL.
>
> When this occurs while calling sm_state_mgr_send_master_sm_info_req, it can
> result in a segmentation fault.
>
> Signed-off-by: Daniel Klein<danielk-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> Signed-off-by: Hal Rosenstock<hal-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> ---
> diff --git a/opensm/osm_sm_state_mgr.c b/opensm/osm_sm_state_mgr.c
> index 596ad8f..770c5f9 100644
> --- a/opensm/osm_sm_state_mgr.c
> +++ b/opensm/osm_sm_state_mgr.c
> @@ -74,7 +74,7 @@ void osm_report_sm_state(osm_sm_t * sm)
>   	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, buf);
>   }
>
> -static void sm_state_mgr_send_master_sm_info_req(osm_sm_t * sm)
> +static void sm_state_mgr_send_master_sm_info_req(osm_sm_t * sm, uint8_t sm_state)
>   {
>   	osm_madw_context_t context;
>   	const osm_port_t *p_port;
> @@ -85,8 +85,7 @@ static void sm_state_mgr_send_master_sm_info_req(osm_sm_t * sm)
>   	OSM_LOG_ENTER(sm->p_log);
>
>   	memset(&context, 0, sizeof(context));
> -	CL_PLOCK_ACQUIRE(sm->p_lock);
> -	if (sm->p_subn->sm_state == IB_SMINFO_STATE_STANDBY) {
> +	if (sm_state == IB_SMINFO_STATE_STANDBY) {
>   		/*
>   		 * We are in STANDBY state - this means we need to poll the
>   		 * master SM (according to master_guid).
> @@ -104,13 +103,19 @@ static void sm_state_mgr_send_master_sm_info_req(osm_sm_t * sm)
>   		guid = sm->p_polling_sm->smi.guid;
>   	}
>
> +	/* Verify that SM is not polling itself */
> +	if (guid == sm->p_subn->sm_port_guid) {
> +		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
> +			"OpenSM doesn't poll itself\n");
> +		goto Exit;
> +	}
> +
>   	p_port = osm_get_port_by_guid(sm->p_subn, guid);
>
>   	if (p_port == NULL) {
>   		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3203: "
>   			"No port object for GUID 0x%016" PRIx64 "\n",
>   			cl_ntoh64(guid));
> -		CL_PLOCK_RELEASE(sm->p_lock);
>   		goto Exit;
>   	}
>
> @@ -122,7 +127,6 @@ static void sm_state_mgr_send_master_sm_info_req(osm_sm_t * sm)
>   			     IB_MAD_ATTR_SM_INFO, 0, FALSE,
>   			     ib_port_info_get_m_key(&p_port->p_physp->port_info),
>   			     CL_DISP_MSGID_NONE,&context);
> -	CL_PLOCK_RELEASE(sm->p_lock);
>
>   	if (status != IB_SUCCESS)
>   		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3204: "
> @@ -135,7 +139,7 @@ Exit:
>
>   static void sm_state_mgr_start_polling(osm_sm_t * sm)
>   {
> -	uint32_t timeout = sm->p_subn->opt.sminfo_polling_timeout;
> +	uint32_t timeout;
>   	cl_status_t cl_status;
>
>   	OSM_LOG_ENTER(sm->p_log);
> @@ -148,7 +152,10 @@ static void sm_state_mgr_start_polling(osm_sm_t * sm)
>   	/*
>   	 * Send a SubnGet(SMInfo) query to the current (or new) master found.
>   	 */
> -	sm_state_mgr_send_master_sm_info_req(sm);
> +	CL_PLOCK_ACQUIRE(sm->p_lock);
> +	timeout = sm->p_subn->opt.sminfo_polling_timeout;
> +	sm_state_mgr_send_master_sm_info_req(sm, sm->p_subn->sm_state);
> +	CL_PLOCK_RELEASE(sm->p_lock);
>
>   	/*
>   	 * Start a timer that will wake up every sminfo_polling_timeout milliseconds.
> @@ -166,21 +173,31 @@ static void sm_state_mgr_start_polling(osm_sm_t * sm)
>   void osm_sm_state_mgr_polling_callback(IN void *context)
>   {
>   	osm_sm_t *sm = context;
> -	uint32_t timeout = sm->p_subn->opt.sminfo_polling_timeout;
> +	uint32_t timeout;
>   	cl_status_t cl_status;
> +	uint8_t sm_state;
>
>   	OSM_LOG_ENTER(sm->p_log);
>
> +	cl_spinlock_acquire(&sm->state_lock);
> +	sm_state = sm->p_subn->sm_state;
> +	cl_spinlock_release(&sm->state_lock);
> +
> +	CL_PLOCK_ACQUIRE(sm->p_lock);
> +	timeout = sm->p_subn->opt.sminfo_polling_timeout;
> +
>   	/*
>   	 * We can be here in one of two cases:
>   	 * 1. We are a STANDBY sm polling on the master SM.
>   	 * 2. We are a MASTER sm, waiting for a handover from a remote master sm.
>   	 * If we are not in one of these cases - don't need to restart the poller.
>   	 */
> -	if (!((sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER&&
> +	if (!((sm_state == IB_SMINFO_STATE_MASTER&&
>   	sm->p_polling_sm != NULL) ||
> -	      sm->p_subn->sm_state == IB_SMINFO_STATE_STANDBY))
> +	      sm_state == IB_SMINFO_STATE_STANDBY)) {
> +		CL_PLOCK_RELEASE(sm->p_lock);
>   		goto Exit;
> +	}
>
>   	/*
>   	 * If we are a STANDBY sm and the osm_exit_flag is set, then let's
> @@ -189,7 +206,8 @@ void osm_sm_state_mgr_polling_callback(IN void *context)
>   	 * received. In other cases - it is not relevant whether or not the
>   	 * signal is on - since we are currently in exit flow
>   	 */
> -	if (sm->p_subn->sm_state == IB_SMINFO_STATE_STANDBY&&  osm_exit_flag) {
> +	if (sm_state == IB_SMINFO_STATE_STANDBY&&  osm_exit_flag) {
> +		CL_PLOCK_RELEASE(sm->p_lock);
>   		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
>   			"Signalling subnet_up_event\n");
>   		cl_event_signal(&sm->subnet_up_event);
> @@ -207,6 +225,7 @@ void osm_sm_state_mgr_polling_callback(IN void *context)
>   		sm->retry_number);
>
>   	if (sm->retry_number>= sm->p_subn->opt.polling_retry_number) {
> +		CL_PLOCK_RELEASE(sm->p_lock);
>   		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
>   			"Reached polling_retry_number value in retry_number. "
>   			"Go to DISCOVERY state\n");
> @@ -215,7 +234,9 @@ void osm_sm_state_mgr_polling_callback(IN void *context)
>   	}
>
>   	/* Send a SubnGet(SMInfo) request to the remote sm (depends on our state) */
> -	sm_state_mgr_send_master_sm_info_req(sm);
> +	sm_state_mgr_send_master_sm_info_req(sm, sm_state);
> +
> +	CL_PLOCK_RELEASE(sm->p_lock);
>
>   	/* restart the timer */
>   	cl_status = cl_timer_start(&sm->polling_timer, timeout);

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

      parent reply	other threads:[~2013-12-05 10:20 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-12-03 13:25 [PATCH opensm] osm_sm_state_mgr.c: Fix race condition during sm_state_mgr_send_master_sm_info_req Hal Rosenstock
     [not found] ` <529DDBB2.80309-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2013-12-05 10:20   ` Line Holen [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=52A05377.5070606@oracle.com \
    --to=line.holen-qhclzuegtsvqt0dzr+alfa@public.gmane.org \
    --cc=danielk-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
    --cc=hal-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.