All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Tyrel Datwyler <tyreld@linux.ibm.com>,
	"Martin K . Petersen" <martin.petersen@oracle.com>,
	Sasha Levin <sashal@kernel.org>,
	linux-scsi@vger.kernel.org, linuxppc-dev@lists.ozlabs.org
Subject: [PATCH AUTOSEL 5.4 15/24] scsi: ibmvscsi: Fix potential race after loss of transport
Date: Mon,  2 Nov 2020 20:19:58 -0500	[thread overview]
Message-ID: <20201103012007.183429-15-sashal@kernel.org> (raw)
In-Reply-To: <20201103012007.183429-1-sashal@kernel.org>

From: Tyrel Datwyler <tyreld@linux.ibm.com>

[ Upstream commit 665e0224a3d76f36da40bd9012270fa629aa42ed ]

After a loss of transport due to an adapter migration or crash/disconnect
from the host partner there is a tiny window where we can race adjusting
the request_limit of the adapter. The request limit is atomically
increased/decreased to track the number of inflight requests against the
allowed limit of our VIOS partner.

After a transport loss we set the request_limit to zero to reflect this
state.  However, there is a window where the adapter may attempt to queue a
command because the transport loss event hasn't been fully processed yet
and request_limit is still greater than zero.  The hypercall to send the
event will fail and the error path will increment the request_limit as a
result.  If the adapter processes the transport event prior to this
increment the request_limit becomes out of sync with the adapter state and
can result in SCSI commands being submitted on the now reset connection
prior to an SRP Login resulting in a protocol violation.

Fix this race by protecting request_limit with the host lock when changing
the value via atomic_set() to indicate no transport.

Link: https://lore.kernel.org/r/20201025001355.4527-1-tyreld@linux.ibm.com
Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/scsi/ibmvscsi/ibmvscsi.c | 36 +++++++++++++++++++++++---------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index c5711c659b517..1ab0a61e3fb59 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -806,6 +806,22 @@ static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code)
 	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 }
 
+/**
+ * ibmvscsi_set_request_limit - Set the adapter request_limit in response to
+ * an adapter failure, reset, or SRP Login. Done under host lock to prevent
+ * race with SCSI command submission.
+ * @hostdata:	adapter to adjust
+ * @limit:	new request limit
+ */
+static void ibmvscsi_set_request_limit(struct ibmvscsi_host_data *hostdata, int limit)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(hostdata->host->host_lock, flags);
+	atomic_set(&hostdata->request_limit, limit);
+	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+}
+
 /**
  * ibmvscsi_reset_host - Reset the connection to the server
  * @hostdata:	struct ibmvscsi_host_data to reset
@@ -813,7 +829,7 @@ static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code)
 static void ibmvscsi_reset_host(struct ibmvscsi_host_data *hostdata)
 {
 	scsi_block_requests(hostdata->host);
-	atomic_set(&hostdata->request_limit, 0);
+	ibmvscsi_set_request_limit(hostdata, 0);
 
 	purge_requests(hostdata, DID_ERROR);
 	hostdata->action = IBMVSCSI_HOST_ACTION_RESET;
@@ -1146,13 +1162,13 @@ static void login_rsp(struct srp_event_struct *evt_struct)
 		dev_info(hostdata->dev, "SRP_LOGIN_REJ reason %u\n",
 			 evt_struct->xfer_iu->srp.login_rej.reason);
 		/* Login failed.  */
-		atomic_set(&hostdata->request_limit, -1);
+		ibmvscsi_set_request_limit(hostdata, -1);
 		return;
 	default:
 		dev_err(hostdata->dev, "Invalid login response typecode 0x%02x!\n",
 			evt_struct->xfer_iu->srp.login_rsp.opcode);
 		/* Login failed.  */
-		atomic_set(&hostdata->request_limit, -1);
+		ibmvscsi_set_request_limit(hostdata, -1);
 		return;
 	}
 
@@ -1163,7 +1179,7 @@ static void login_rsp(struct srp_event_struct *evt_struct)
 	 * This value is set rather than added to request_limit because
 	 * request_limit could have been set to -1 by this client.
 	 */
-	atomic_set(&hostdata->request_limit,
+	ibmvscsi_set_request_limit(hostdata,
 		   be32_to_cpu(evt_struct->xfer_iu->srp.login_rsp.req_lim_delta));
 
 	/* If we had any pending I/Os, kick them */
@@ -1195,13 +1211,13 @@ static int send_srp_login(struct ibmvscsi_host_data *hostdata)
 	login->req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
 					 SRP_BUF_FORMAT_INDIRECT);
 
-	spin_lock_irqsave(hostdata->host->host_lock, flags);
 	/* Start out with a request limit of 0, since this is negotiated in
 	 * the login request we are just sending and login requests always
 	 * get sent by the driver regardless of request_limit.
 	 */
-	atomic_set(&hostdata->request_limit, 0);
+	ibmvscsi_set_request_limit(hostdata, 0);
 
+	spin_lock_irqsave(hostdata->host->host_lock, flags);
 	rc = ibmvscsi_send_srp_event(evt_struct, hostdata, login_timeout * 2);
 	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 	dev_info(hostdata->dev, "sent SRP login\n");
@@ -1781,7 +1797,7 @@ static void ibmvscsi_handle_crq(struct viosrp_crq *crq,
 		return;
 	case VIOSRP_CRQ_XPORT_EVENT:	/* Hypervisor telling us the connection is closed */
 		scsi_block_requests(hostdata->host);
-		atomic_set(&hostdata->request_limit, 0);
+		ibmvscsi_set_request_limit(hostdata, 0);
 		if (crq->format == 0x06) {
 			/* We need to re-setup the interpartition connection */
 			dev_info(hostdata->dev, "Re-enabling adapter!\n");
@@ -2137,12 +2153,12 @@ static void ibmvscsi_do_work(struct ibmvscsi_host_data *hostdata)
 	}
 
 	hostdata->action = IBMVSCSI_HOST_ACTION_NONE;
+	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 
 	if (rc) {
-		atomic_set(&hostdata->request_limit, -1);
+		ibmvscsi_set_request_limit(hostdata, -1);
 		dev_err(hostdata->dev, "error after %s\n", action);
 	}
-	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 
 	scsi_unblock_requests(hostdata->host);
 }
@@ -2226,7 +2242,7 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	init_waitqueue_head(&hostdata->work_wait_q);
 	hostdata->host = host;
 	hostdata->dev = dev;
-	atomic_set(&hostdata->request_limit, -1);
+	ibmvscsi_set_request_limit(hostdata, -1);
 	hostdata->host->max_sectors = IBMVSCSI_MAX_SECTORS_DEFAULT;
 
 	if (map_persist_bufs(hostdata)) {
-- 
2.27.0


WARNING: multiple messages have this Message-ID (diff)
From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Sasha Levin <sashal@kernel.org>,
	Tyrel Datwyler <tyreld@linux.ibm.com>,
	linuxppc-dev@lists.ozlabs.org, linux-scsi@vger.kernel.org,
	"Martin K . Petersen" <martin.petersen@oracle.com>
Subject: [PATCH AUTOSEL 5.4 15/24] scsi: ibmvscsi: Fix potential race after loss of transport
Date: Mon,  2 Nov 2020 20:19:58 -0500	[thread overview]
Message-ID: <20201103012007.183429-15-sashal@kernel.org> (raw)
In-Reply-To: <20201103012007.183429-1-sashal@kernel.org>

From: Tyrel Datwyler <tyreld@linux.ibm.com>

[ Upstream commit 665e0224a3d76f36da40bd9012270fa629aa42ed ]

After a loss of transport due to an adapter migration or crash/disconnect
from the host partner there is a tiny window where we can race adjusting
the request_limit of the adapter. The request limit is atomically
increased/decreased to track the number of inflight requests against the
allowed limit of our VIOS partner.

After a transport loss we set the request_limit to zero to reflect this
state.  However, there is a window where the adapter may attempt to queue a
command because the transport loss event hasn't been fully processed yet
and request_limit is still greater than zero.  The hypercall to send the
event will fail and the error path will increment the request_limit as a
result.  If the adapter processes the transport event prior to this
increment the request_limit becomes out of sync with the adapter state and
can result in SCSI commands being submitted on the now reset connection
prior to an SRP Login resulting in a protocol violation.

Fix this race by protecting request_limit with the host lock when changing
the value via atomic_set() to indicate no transport.

Link: https://lore.kernel.org/r/20201025001355.4527-1-tyreld@linux.ibm.com
Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/scsi/ibmvscsi/ibmvscsi.c | 36 +++++++++++++++++++++++---------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index c5711c659b517..1ab0a61e3fb59 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -806,6 +806,22 @@ static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code)
 	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 }
 
+/**
+ * ibmvscsi_set_request_limit - Set the adapter request_limit in response to
+ * an adapter failure, reset, or SRP Login. Done under host lock to prevent
+ * race with SCSI command submission.
+ * @hostdata:	adapter to adjust
+ * @limit:	new request limit
+ */
+static void ibmvscsi_set_request_limit(struct ibmvscsi_host_data *hostdata, int limit)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(hostdata->host->host_lock, flags);
+	atomic_set(&hostdata->request_limit, limit);
+	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+}
+
 /**
  * ibmvscsi_reset_host - Reset the connection to the server
  * @hostdata:	struct ibmvscsi_host_data to reset
@@ -813,7 +829,7 @@ static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code)
 static void ibmvscsi_reset_host(struct ibmvscsi_host_data *hostdata)
 {
 	scsi_block_requests(hostdata->host);
-	atomic_set(&hostdata->request_limit, 0);
+	ibmvscsi_set_request_limit(hostdata, 0);
 
 	purge_requests(hostdata, DID_ERROR);
 	hostdata->action = IBMVSCSI_HOST_ACTION_RESET;
@@ -1146,13 +1162,13 @@ static void login_rsp(struct srp_event_struct *evt_struct)
 		dev_info(hostdata->dev, "SRP_LOGIN_REJ reason %u\n",
 			 evt_struct->xfer_iu->srp.login_rej.reason);
 		/* Login failed.  */
-		atomic_set(&hostdata->request_limit, -1);
+		ibmvscsi_set_request_limit(hostdata, -1);
 		return;
 	default:
 		dev_err(hostdata->dev, "Invalid login response typecode 0x%02x!\n",
 			evt_struct->xfer_iu->srp.login_rsp.opcode);
 		/* Login failed.  */
-		atomic_set(&hostdata->request_limit, -1);
+		ibmvscsi_set_request_limit(hostdata, -1);
 		return;
 	}
 
@@ -1163,7 +1179,7 @@ static void login_rsp(struct srp_event_struct *evt_struct)
 	 * This value is set rather than added to request_limit because
 	 * request_limit could have been set to -1 by this client.
 	 */
-	atomic_set(&hostdata->request_limit,
+	ibmvscsi_set_request_limit(hostdata,
 		   be32_to_cpu(evt_struct->xfer_iu->srp.login_rsp.req_lim_delta));
 
 	/* If we had any pending I/Os, kick them */
@@ -1195,13 +1211,13 @@ static int send_srp_login(struct ibmvscsi_host_data *hostdata)
 	login->req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
 					 SRP_BUF_FORMAT_INDIRECT);
 
-	spin_lock_irqsave(hostdata->host->host_lock, flags);
 	/* Start out with a request limit of 0, since this is negotiated in
 	 * the login request we are just sending and login requests always
 	 * get sent by the driver regardless of request_limit.
 	 */
-	atomic_set(&hostdata->request_limit, 0);
+	ibmvscsi_set_request_limit(hostdata, 0);
 
+	spin_lock_irqsave(hostdata->host->host_lock, flags);
 	rc = ibmvscsi_send_srp_event(evt_struct, hostdata, login_timeout * 2);
 	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 	dev_info(hostdata->dev, "sent SRP login\n");
@@ -1781,7 +1797,7 @@ static void ibmvscsi_handle_crq(struct viosrp_crq *crq,
 		return;
 	case VIOSRP_CRQ_XPORT_EVENT:	/* Hypervisor telling us the connection is closed */
 		scsi_block_requests(hostdata->host);
-		atomic_set(&hostdata->request_limit, 0);
+		ibmvscsi_set_request_limit(hostdata, 0);
 		if (crq->format == 0x06) {
 			/* We need to re-setup the interpartition connection */
 			dev_info(hostdata->dev, "Re-enabling adapter!\n");
@@ -2137,12 +2153,12 @@ static void ibmvscsi_do_work(struct ibmvscsi_host_data *hostdata)
 	}
 
 	hostdata->action = IBMVSCSI_HOST_ACTION_NONE;
+	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 
 	if (rc) {
-		atomic_set(&hostdata->request_limit, -1);
+		ibmvscsi_set_request_limit(hostdata, -1);
 		dev_err(hostdata->dev, "error after %s\n", action);
 	}
-	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 
 	scsi_unblock_requests(hostdata->host);
 }
@@ -2226,7 +2242,7 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	init_waitqueue_head(&hostdata->work_wait_q);
 	hostdata->host = host;
 	hostdata->dev = dev;
-	atomic_set(&hostdata->request_limit, -1);
+	ibmvscsi_set_request_limit(hostdata, -1);
 	hostdata->host->max_sectors = IBMVSCSI_MAX_SECTORS_DEFAULT;
 
 	if (map_persist_bufs(hostdata)) {
-- 
2.27.0


  parent reply	other threads:[~2020-11-03  1:24 UTC|newest]

Thread overview: 63+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-03  1:19 [PATCH AUTOSEL 5.4 01/24] ARM: dts: sun4i-a10: fix cpu_alert temperature Sasha Levin
2020-11-03  1:19 ` Sasha Levin
2020-11-03  1:19 ` [PATCH AUTOSEL 5.4 02/24] arm64: dts: meson-axg: add USB nodes Sasha Levin
2020-11-03  1:19   ` Sasha Levin
2020-11-03  1:19   ` Sasha Levin
2020-11-03  8:55   ` Neil Armstrong
2020-11-03  8:55     ` Neil Armstrong
2020-11-03  8:55     ` Neil Armstrong
2020-11-03  1:19 ` [PATCH AUTOSEL 5.4 03/24] arm64: dts: meson-axg-s400: enable USB OTG Sasha Levin
2020-11-03  1:19   ` Sasha Levin
2020-11-03  1:19   ` Sasha Levin
2020-11-03  8:55   ` Neil Armstrong
2020-11-03  8:55     ` Neil Armstrong
2020-11-03  8:55     ` Neil Armstrong
2020-11-03  1:19 ` [PATCH AUTOSEL 5.4 04/24] arm64: dts: meson: add missing g12 rng clock Sasha Levin
2020-11-03  1:19   ` Sasha Levin
2020-11-03  1:19   ` Sasha Levin
2020-11-03  1:19 ` [PATCH AUTOSEL 5.4 05/24] arm64: dts: amlogic: meson-g12: use the G12A specific dwmac compatible Sasha Levin
2020-11-03  1:19   ` Sasha Levin
2020-11-03  1:19   ` Sasha Levin
2020-11-03  5:53   ` Martin Blumenstingl
2020-11-03  5:53     ` Martin Blumenstingl
2020-11-03  5:53     ` Martin Blumenstingl
2020-11-08 13:21     ` Sasha Levin
2020-11-08 13:21       ` Sasha Levin
2020-11-08 13:21       ` Sasha Levin
2020-11-03  1:19 ` [PATCH AUTOSEL 5.4 06/24] x86/kexec: Use up-to-dated screen_info copy to fill boot params Sasha Levin
2020-11-03  1:19 ` [PATCH AUTOSEL 5.4 07/24] of: Fix reserved-memory overlap detection Sasha Levin
2020-11-03  1:19 ` [PATCH AUTOSEL 5.4 08/24] drm/sun4i: frontend: Rework a bit the phase data Sasha Levin
2020-11-03  1:19   ` Sasha Levin
2020-11-03  1:19   ` Sasha Levin
2020-11-03  1:19 ` [PATCH AUTOSEL 5.4 09/24] drm/sun4i: frontend: Reuse the ch0 phase for RGB formats Sasha Levin
2020-11-03  1:19   ` Sasha Levin
2020-11-03  1:19   ` Sasha Levin
2020-11-03  1:19 ` [PATCH AUTOSEL 5.4 10/24] drm/sun4i: frontend: Fix the scaler phase on A33 Sasha Levin
2020-11-03  1:19   ` Sasha Levin
2020-11-03  1:19   ` Sasha Levin
2020-11-03  1:19 ` [PATCH AUTOSEL 5.4 11/24] blk-cgroup: Fix memleak on error path Sasha Levin
     [not found] ` <20201103012007.183429-1-sashal-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2020-11-03  1:19   ` [PATCH AUTOSEL 5.4 12/24] blk-cgroup: Pre-allocate tree node on blkg_conf_prep Sasha Levin
2020-11-03  1:19     ` Sasha Levin
2020-11-03  1:20   ` [PATCH AUTOSEL 5.4 22/24] drm/nouveau/nouveau: fix the start/end range for migration Sasha Levin
2020-11-03  1:20     ` Sasha Levin
2020-11-03  1:20     ` Sasha Levin
2020-11-03  1:19 ` [PATCH AUTOSEL 5.4 13/24] scsi: core: Don't start concurrent async scan on same host Sasha Levin
2020-11-03  1:19 ` [PATCH AUTOSEL 5.4 14/24] drm/amdgpu: add DID for navi10 blockchain SKU Sasha Levin
2020-11-03  1:19   ` Sasha Levin
2020-11-03  1:19   ` Sasha Levin
2020-11-03  1:19 ` Sasha Levin [this message]
2020-11-03  1:19   ` [PATCH AUTOSEL 5.4 15/24] scsi: ibmvscsi: Fix potential race after loss of transport Sasha Levin
2020-11-03  1:19 ` [PATCH AUTOSEL 5.4 16/24] vsock: use ns_capable_noaudit() on socket create Sasha Levin
2020-11-03  1:20 ` [PATCH AUTOSEL 5.4 17/24] nvme-rdma: handle unexpected nvme completion data length Sasha Levin
2020-11-03  1:20   ` Sasha Levin
2020-11-03  1:20 ` [PATCH AUTOSEL 5.4 18/24] nvmet: fix a NULL pointer dereference when tracing the flush command Sasha Levin
2020-11-03  1:20   ` Sasha Levin
2020-11-03  1:20 ` [PATCH AUTOSEL 5.4 19/24] drm/vc4: drv: Add error handding for bind Sasha Levin
2020-11-03  1:20   ` Sasha Levin
2020-11-03  1:20 ` [PATCH AUTOSEL 5.4 20/24] ACPI: NFIT: Fix comparison to '-ENXIO' Sasha Levin
2020-11-03  1:20   ` Sasha Levin
2020-11-03  1:20 ` [PATCH AUTOSEL 5.4 21/24] usb: cdns3: gadget: suspicious implicit sign extension Sasha Levin
2020-11-03  1:20 ` [PATCH AUTOSEL 5.4 23/24] drm/nouveau/gem: fix "refcount_t: underflow; use-after-free" Sasha Levin
2020-11-03  1:20   ` Sasha Levin
2020-11-03  1:20 ` [PATCH AUTOSEL 5.4 24/24] arm64/smp: Move rcu_cpu_starting() earlier Sasha Levin
2020-11-03  1:20   ` Sasha Levin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201103012007.183429-15-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=martin.petersen@oracle.com \
    --cc=stable@vger.kernel.org \
    --cc=tyreld@linux.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.