public inbox for linux-scsi@vger.kernel.org
 help / color / mirror / Atom feed
From: Chris Leech <cleech-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
To: linux-scsi-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	open-iscsi-/JYPxA39Uh5TLH3MbocFFw@public.gmane.org,
	lduncan-IBi9RG/b67k@public.gmane.org,
	martin petersen
	<martin.petersen-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>,
	jejb-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org,
	gpiccoli-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org,
	ilkka-/1wQRMveznE@public.gmane.org
Subject: Re: [PATCH] libiscsi: add lock around task lists to fix list corruption regression
Date: Thu, 23 Feb 2017 17:25:48 -0500 (EST)	[thread overview]
Message-ID: <250619225.63072551.1487888748696.JavaMail.zimbra@redhat.com> (raw)
In-Reply-To: <20170221173536.32024-1-cleech-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>

Yikes, my git-send-email settings suppressed the important CCs.  Sorry!

Guilherme and Ilkka, can you comment about your testing results or review please?

- Chris Leech

----- Original Message -----
> There's a rather long standing regression from commit
> 659743b [SCSI] libiscsi: Reduce locking contention in fast path
> 
> Depending on iSCSI target behavior, it's possible to hit the case in
> iscsi_complete_task where the task is still on a pending list
> (!list_empty(&task->running)).  When that happens the task is removed
> from the list while holding the session back_lock, but other task list
> modification occur under the frwd_lock.  That leads to linked list
> corruption and eventually a panicked system.
> 
> Rather than back out the session lock split entirely, in order to try
> and keep some of the performance gains this patch adds another lock to
> maintain the task lists integrity.
> 
> Major enterprise supported kernels have been backing out the lock split
> for while now, thanks to the efforts at IBM where a lab setup has the
> most reliable reproducer I've seen on this issue.  This patch has been
> tested there successfully.
> 
> Signed-off-by: Chris Leech <cleech-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
> ---
>  drivers/scsi/libiscsi.c | 26 +++++++++++++++++++++++++-
>  include/scsi/libiscsi.h |  1 +
>  2 files changed, 26 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
> index 834d121..acb5ef3 100644
> --- a/drivers/scsi/libiscsi.c
> +++ b/drivers/scsi/libiscsi.c
> @@ -560,8 +560,12 @@ static void iscsi_complete_task(struct iscsi_task *task,
> int state)
>  	WARN_ON_ONCE(task->state == ISCSI_TASK_FREE);
>  	task->state = state;
>  
> -	if (!list_empty(&task->running))
> +	spin_lock_bh(&conn->taskqueuelock);
> +	if (!list_empty(&task->running)) {
> +		WARN_ONCE(1, "iscsi_complete_task while task on list");
>  		list_del_init(&task->running);
> +	}
> +	spin_unlock_bh(&conn->taskqueuelock);
>  
>  	if (conn->task == task)
>  		conn->task = NULL;
> @@ -783,7 +787,9 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct
> iscsi_hdr *hdr,
>  		if (session->tt->xmit_task(task))
>  			goto free_task;
>  	} else {
> +		spin_lock_bh(&conn->taskqueuelock);
>  		list_add_tail(&task->running, &conn->mgmtqueue);
> +		spin_unlock_bh(&conn->taskqueuelock);
>  		iscsi_conn_queue_work(conn);
>  	}
>  
> @@ -1474,8 +1480,10 @@ void iscsi_requeue_task(struct iscsi_task *task)
>  	 * this may be on the requeue list already if the xmit_task callout
>  	 * is handling the r2ts while we are adding new ones
>  	 */
> +	spin_lock_bh(&conn->taskqueuelock);
>  	if (list_empty(&task->running))
>  		list_add_tail(&task->running, &conn->requeue);
> +	spin_unlock_bh(&conn->taskqueuelock);
>  	iscsi_conn_queue_work(conn);
>  }
>  EXPORT_SYMBOL_GPL(iscsi_requeue_task);
> @@ -1512,22 +1520,26 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
>  	 * only have one nop-out as a ping from us and targets should not
>  	 * overflow us with nop-ins
>  	 */
> +	spin_lock_bh(&conn->taskqueuelock);
>  check_mgmt:
>  	while (!list_empty(&conn->mgmtqueue)) {
>  		conn->task = list_entry(conn->mgmtqueue.next,
>  					 struct iscsi_task, running);
>  		list_del_init(&conn->task->running);
> +		spin_unlock_bh(&conn->taskqueuelock);
>  		if (iscsi_prep_mgmt_task(conn, conn->task)) {
>  			/* regular RX path uses back_lock */
>  			spin_lock_bh(&conn->session->back_lock);
>  			__iscsi_put_task(conn->task);
>  			spin_unlock_bh(&conn->session->back_lock);
>  			conn->task = NULL;
> +			spin_lock_bh(&conn->taskqueuelock);
>  			continue;
>  		}
>  		rc = iscsi_xmit_task(conn);
>  		if (rc)
>  			goto done;
> +		spin_lock_bh(&conn->taskqueuelock);
>  	}
>  
>  	/* process pending command queue */
> @@ -1535,19 +1547,24 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
>  		conn->task = list_entry(conn->cmdqueue.next, struct iscsi_task,
>  					running);
>  		list_del_init(&conn->task->running);
> +		spin_unlock_bh(&conn->taskqueuelock);
>  		if (conn->session->state == ISCSI_STATE_LOGGING_OUT) {
>  			fail_scsi_task(conn->task, DID_IMM_RETRY);
> +			spin_lock_bh(&conn->taskqueuelock);
>  			continue;
>  		}
>  		rc = iscsi_prep_scsi_cmd_pdu(conn->task);
>  		if (rc) {
>  			if (rc == -ENOMEM || rc == -EACCES) {
> +				spin_lock_bh(&conn->taskqueuelock);
>  				list_add_tail(&conn->task->running,
>  					      &conn->cmdqueue);
>  				conn->task = NULL;
> +				spin_unlock_bh(&conn->taskqueuelock);
>  				goto done;
>  			} else
>  				fail_scsi_task(conn->task, DID_ABORT);
> +			spin_lock_bh(&conn->taskqueuelock);
>  			continue;
>  		}
>  		rc = iscsi_xmit_task(conn);
> @@ -1558,6 +1575,7 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
>  		 * we need to check the mgmt queue for nops that need to
>  		 * be sent to aviod starvation
>  		 */
> +		spin_lock_bh(&conn->taskqueuelock);
>  		if (!list_empty(&conn->mgmtqueue))
>  			goto check_mgmt;
>  	}
> @@ -1577,12 +1595,15 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
>  		conn->task = task;
>  		list_del_init(&conn->task->running);
>  		conn->task->state = ISCSI_TASK_RUNNING;
> +		spin_unlock_bh(&conn->taskqueuelock);
>  		rc = iscsi_xmit_task(conn);
>  		if (rc)
>  			goto done;
> +		spin_lock_bh(&conn->taskqueuelock);
>  		if (!list_empty(&conn->mgmtqueue))
>  			goto check_mgmt;
>  	}
> +	spin_unlock_bh(&conn->taskqueuelock);
>  	spin_unlock_bh(&conn->session->frwd_lock);
>  	return -ENODATA;
>  
> @@ -1738,7 +1759,9 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct
> scsi_cmnd *sc)
>  			goto prepd_reject;
>  		}
>  	} else {
> +		spin_lock_bh(&conn->taskqueuelock);
>  		list_add_tail(&task->running, &conn->cmdqueue);
> +		spin_unlock_bh(&conn->taskqueuelock);
>  		iscsi_conn_queue_work(conn);
>  	}
>  
> @@ -2896,6 +2919,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session,
> int dd_size,
>  	INIT_LIST_HEAD(&conn->mgmtqueue);
>  	INIT_LIST_HEAD(&conn->cmdqueue);
>  	INIT_LIST_HEAD(&conn->requeue);
> +	spin_lock_init(&conn->taskqueuelock);
>  	INIT_WORK(&conn->xmitwork, iscsi_xmitworker);
>  
>  	/* allocate login_task used for the login/text sequences */
> diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
> index b0e275d..583875e 100644
> --- a/include/scsi/libiscsi.h
> +++ b/include/scsi/libiscsi.h
> @@ -196,6 +196,7 @@ struct iscsi_conn {
>  	struct iscsi_task	*task;		/* xmit task in progress */
>  
>  	/* xmit */
> +	spinlock_t		taskqueuelock;  /* protects the next three lists */
>  	struct list_head	mgmtqueue;	/* mgmt (control) xmit queue */
>  	struct list_head	cmdqueue;	/* data-path cmd queue */
>  	struct list_head	requeue;	/* tasks needing another run */
> --
> 2.9.3
> 
> 

-- 
You received this message because you are subscribed to the Google Groups "open-iscsi" group.
To unsubscribe from this group and stop receiving emails from it, send an email to open-iscsi+unsubscribe-/JYPxA39Uh5TLH3MbocFF+G/Ez6ZCGd0@public.gmane.org
To post to this group, send email to open-iscsi-/JYPxA39Uh5TLH3MbocFF+G/Ez6ZCGd0@public.gmane.org
Visit this group at https://groups.google.com/group/open-iscsi.
For more options, visit https://groups.google.com/d/optout.

  parent reply	other threads:[~2017-02-23 22:25 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-02-21 17:35 [PATCH] libiscsi: add lock around task lists to fix list corruption regression Chris Leech
     [not found] ` <20170221173536.32024-1-cleech-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2017-02-23 22:11   ` Martin K. Petersen
2017-02-23 22:25   ` Chris Leech [this message]
2017-02-24 15:09     ` Guilherme G. Piccoli
     [not found]       ` <4304a9c0-6427-c0a2-e18a-51292d62022b-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2017-02-24 19:48         ` Ilkka Sovanto
     [not found]     ` <250619225.63072551.1487888748696.JavaMail.zimbra-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2017-03-05 20:49       ` Ilkka Sovanto
     [not found]         ` <CAKYn0tiyWNEjFZHaPDHNjam5=3WCR_V2svNiJmvWeYhpv3=8Ag-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2018-05-09 22:27           ` Ilkka Sovanto
     [not found]             ` <CAKYn0tjF46gxwj3MtjLL_ic+veXjTZ9n_BGKjz6-fW+Cse2w9w-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2018-12-18 21:07               ` Alan Adamson
2017-02-28  0:58   ` [PATCH v2] " Chris Leech
     [not found]     ` <20170228005836.12444-1-cleech-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2017-02-28 16:46       ` Guilherme G. Piccoli
2017-03-01  3:06       ` Martin K. Petersen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=250619225.63072551.1487888748696.JavaMail.zimbra@redhat.com \
    --to=cleech-h+wxahxf7alqt0dzr+alfa@public.gmane.org \
    --cc=gpiccoli-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org \
    --cc=ilkka-/1wQRMveznE@public.gmane.org \
    --cc=jejb-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org \
    --cc=lduncan-IBi9RG/b67k@public.gmane.org \
    --cc=linux-scsi-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=martin.petersen-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org \
    --cc=open-iscsi-/JYPxA39Uh5TLH3MbocFFw@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox