All of lore.kernel.org
 help / color / mirror / Atom feed
From: Bart Van Assche <bvanassche-HInyCGIudOg@public.gmane.org>
To: Sagi Grimberg
	<sagig-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>,
	Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Cc: roland-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org,
	oren-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Subject: Re: [PATCH v1 1/3] IB/srp: Fix crash when unmapping data loop
Date: Thu, 06 Mar 2014 10:10:09 +0100	[thread overview]
Message-ID: <53183B71.4090609@acm.org> (raw)
In-Reply-To: <530F225E.5070500-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>

On 02/27/14 12:32, Sagi Grimberg wrote:
> As I recall  (need to re-confirm this), the problem was that in unstable
> ports condition srp_abort is
> called invoking srp_free_req (unmap call #1) and reconnect process (or
> reset_device or terminate_io)
> finishes all requests in the request ring (unmap call #2). when FMRs are
> used then nfmr goes to zero
> the first time, but when FMRs are not supported nfmr goes from 0 to -1
> causing the crash since nfmr condition
> is not safe.

Hello Sagi,

Thinking further about this, it looks to me like the only way to avoid
that srp_terminate_io() triggers a race condition with the error path
in srp_queuecommand() is to set req->scmnd only if srp_post_send() has
succeeded. How about the patch below ? That patch should ensure that
srp_unmap_data() is only called once for each request and hence should
avoid triggering the crash you mentioned.

Thanks,

Bart.

[PATCH] IB/srp: Fix a race condition between fast failing I/O and I/O queueing

Avoid that srp_unmap_data() can get invoked concurrently by
srp_terminate_io() and from the error path in srp_queuecommand()
if srp_post_send() fails.

Signed-off-by: Bart Van Assche <bvanassche-HInyCGIudOg@public.gmane.org>
---
 drivers/infiniband/ulp/srp/ib_srp.c | 46 +++++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 7858240..85eb59f 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -783,6 +783,7 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
  * srp_claim_req - Take ownership of the scmnd associated with a request.
  * @target: SRP target port.
  * @req: SRP request.
+ * @sdev: If not NULL, only take ownership for this SCSI device.
  * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
  *         ownership of @req->scmnd if it equals @scmnd.
  *
@@ -791,16 +792,17 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
  */
 static struct scsi_cmnd *srp_claim_req(struct srp_target_port *target,
 				       struct srp_request *req,
+				       struct scsi_device *sdev,
 				       struct scsi_cmnd *scmnd)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&target->lock, flags);
-	if (!scmnd) {
+	if (req->scmnd &&
+	    (!sdev || req->scmnd->device == sdev) &&
+	    (!scmnd || req->scmnd == scmnd)) {
 		scmnd = req->scmnd;
 		req->scmnd = NULL;
-	} else if (req->scmnd == scmnd) {
-		req->scmnd = NULL;
 	} else {
 		scmnd = NULL;
 	}
@@ -827,9 +829,10 @@ static void srp_free_req(struct srp_target_port *target,
 }
 
 static void srp_finish_req(struct srp_target_port *target,
-			   struct srp_request *req, int result)
+			   struct srp_request *req, struct scsi_device *sdev,
+			   int result)
 {
-	struct scsi_cmnd *scmnd = srp_claim_req(target, req, NULL);
+	struct scsi_cmnd *scmnd = srp_claim_req(target, req, sdev, NULL);
 
 	if (scmnd) {
 		srp_free_req(target, req, scmnd, 0);
@@ -845,7 +848,7 @@ static void srp_terminate_io(struct srp_rport *rport)
 
 	for (i = 0; i < target->req_ring_size; ++i) {
 		struct srp_request *req = &target->req_ring[i];
-		srp_finish_req(target, req, DID_TRANSPORT_FAILFAST << 16);
+		srp_finish_req(target, req, NULL, DID_TRANSPORT_FAILFAST << 16);
 	}
 }
 
@@ -882,7 +885,7 @@ static int srp_rport_reconnect(struct srp_rport *rport)
 
 	for (i = 0; i < target->req_ring_size; ++i) {
 		struct srp_request *req = &target->req_ring[i];
-		srp_finish_req(target, req, DID_RESET << 16);
+		srp_finish_req(target, req, NULL, DID_RESET << 16);
 	}
 
 	INIT_LIST_HEAD(&target->free_tx);
@@ -1290,7 +1293,7 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
 		complete(&target->tsk_mgmt_done);
 	} else {
 		req = &target->req_ring[rsp->tag];
-		scmnd = srp_claim_req(target, req, NULL);
+		scmnd = srp_claim_req(target, req, NULL, NULL);
 		if (!scmnd) {
 			shost_printk(KERN_ERR, target->scsi_host,
 				     "Null scmnd for RSP w/tag %016llx\n",
@@ -1509,7 +1512,7 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
 	struct srp_cmd *cmd;
 	struct ib_device *dev;
 	unsigned long flags;
-	int len, result;
+	int len, result, ret = 0;
 	const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
 
 	/*
@@ -1552,8 +1555,7 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
 	cmd->tag    = req->index;
 	memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
 
-	req->scmnd    = scmnd;
-	req->cmd      = iu;
+	req->cmd = iu;
 
 	len = srp_map_data(scmnd, target, req);
 	if (len < 0) {
@@ -1565,7 +1567,14 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
 	ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
 				      DMA_TO_DEVICE);
 
-	if (srp_post_send(target, iu, len)) {
+	spin_lock_irqsave(&target->lock, flags);
+	if (srp_post_send(target, iu, len) == 0)
+		req->scmnd = scmnd;
+	else
+		ret = SCSI_MLQUEUE_HOST_BUSY;
+	spin_unlock_irqrestore(&target->lock, flags);
+
+	if (ret) {
 		shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
 		goto err_unmap;
 	}
@@ -1574,7 +1583,7 @@ unlock_rport:
 	if (in_scsi_eh)
 		mutex_unlock(&rport->mutex);
 
-	return 0;
+	return ret;
 
 err_unmap:
 	srp_unmap_data(scmnd, target, req);
@@ -1588,10 +1597,8 @@ err_iu:
 err_unlock:
 	spin_unlock_irqrestore(&target->lock, flags);
 
-	if (in_scsi_eh)
-		mutex_unlock(&rport->mutex);
-
-	return SCSI_MLQUEUE_HOST_BUSY;
+	ret = SCSI_MLQUEUE_HOST_BUSY;
+	goto unlock_rport;
 }
 
 /*
@@ -2008,7 +2015,7 @@ static int srp_abort(struct scsi_cmnd *scmnd)
 
 	shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
 
-	if (!req || !srp_claim_req(target, req, scmnd))
+	if (!req || !srp_claim_req(target, req, NULL, scmnd))
 		return SUCCESS;
 	if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun,
 			      SRP_TSK_ABORT_TASK) == 0)
@@ -2039,8 +2046,7 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
 
 	for (i = 0; i < target->req_ring_size; ++i) {
 		struct srp_request *req = &target->req_ring[i];
-		if (req->scmnd && req->scmnd->device == scmnd->device)
-			srp_finish_req(target, req, DID_RESET << 16);
+		srp_finish_req(target, req, scmnd->device, DID_RESET << 16);
 	}
 
 	return SUCCESS;
-- 
1.8.4.5



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  parent reply	other threads:[~2014-03-06  9:10 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-02-24 14:30 [PATCH v1 0/3] SRP initiator fixes for kernel 3.15 Sagi Grimberg
     [not found] ` <1393252218-30638-1-git-send-email-sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-02-24 14:30   ` [PATCH v1 1/3] IB/srp: Fix crash when unmapping data loop Sagi Grimberg
     [not found]     ` <1393252218-30638-2-git-send-email-sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-02-24 15:11       ` Sebastian Riemer
2014-02-24 15:24       ` Bart Van Assche
     [not found]         ` <530B6444.1000805-HInyCGIudOg@public.gmane.org>
2014-02-27 11:32           ` Sagi Grimberg
     [not found]             ` <530F225E.5070500-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2014-03-06  9:10               ` Bart Van Assche [this message]
     [not found]                 ` <53183B71.4090609-HInyCGIudOg@public.gmane.org>
2014-03-06 15:32                   ` sagi grimberg
     [not found]                     ` <53189526.2010704-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-03-06 16:10                       ` Sagi Grimberg
     [not found]                         ` <53189DDE.7090003-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2014-03-07  8:13                           ` Bart Van Assche
     [not found]                             ` <53197FC5.4090102-HInyCGIudOg@public.gmane.org>
2014-03-11 13:38                               ` Sagi Grimberg
     [not found]                                 ` <531F11B8.5030202-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2014-03-11 13:51                                   ` Bart Van Assche
     [not found]                                     ` <531F14D0.7010608-HInyCGIudOg@public.gmane.org>
2014-03-11 14:07                                       ` Sagi Grimberg
     [not found]                                         ` <531F18A7.1000907-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2014-03-11 14:29                                           ` Bart Van Assche
     [not found]                                             ` <531F1DBD.9070505-HInyCGIudOg@public.gmane.org>
2014-03-11 14:48                                               ` Sagi Grimberg
     [not found]                                                 ` <531F2221.2090403-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2014-03-11 15:00                                                   ` Bart Van Assche
     [not found]                                                     ` <531F2501.9090005-HInyCGIudOg@public.gmane.org>
2014-03-11 15:30                                                       ` Sagi Grimberg
     [not found]                                                         ` <531F2C2C.7080306-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2014-03-12 13:16                                                           ` Bart Van Assche
     [not found]                                                             ` <53205E1E.3070304-HInyCGIudOg@public.gmane.org>
2014-03-13  7:42                                                               ` Sagi Grimberg
2014-02-24 14:30   ` [PATCH v1 2/3] IB/srp: Check ib_query_gid return value Sagi Grimberg
     [not found]     ` <1393252218-30638-3-git-send-email-sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-02-24 15:34       ` Bart Van Assche
2014-02-24 14:30   ` [PATCH v1 3/3] IB/srp: Protect free_tx iu list from concurrent flows Sagi Grimberg
     [not found]     ` <1393252218-30638-4-git-send-email-sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-02-24 15:38       ` Bart Van Assche
     [not found]         ` <530B677A.1040508-HInyCGIudOg@public.gmane.org>
2014-02-27 11:51           ` Sagi Grimberg
     [not found]             ` <530F26CE.5070404-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2014-02-27 14:22               ` Bart Van Assche

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=53183B71.4090609@acm.org \
    --to=bvanassche-hinycgiudog@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=oren-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
    --cc=roland-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org \
    --cc=sagig-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org \
    --cc=sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.