From: Bart Van Assche <bvanassche-HInyCGIudOg@public.gmane.org>
To: Roland Dreier <roland-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Cc: David Dillow <dillowda-1Heg1YXhbW8@public.gmane.org>,
Vu Pham <vuhuong-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>,
Sebastian Riemer
<sebastian.riemer-EIkl63zCoXaH+58JC4qpiA@public.gmane.org>,
linux-rdma <linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>
Subject: [PATCH 09/14] IB/srp: Use SRP transport layer error recovery
Date: Wed, 12 Jun 2013 15:30:30 +0200 [thread overview]
Message-ID: <51B877F6.6050000@acm.org> (raw)
In-Reply-To: <51B87501.4070005-HInyCGIudOg@public.gmane.org>
Enable fast_io_fail_tmo and dev_loss_tmo functionality for the IB
SRP initiator.
Signed-off-by: Bart Van Assche <bvanassche-HInyCGIudOg@public.gmane.org>
Cc: David Dillow <dillowda-1Heg1YXhbW8@public.gmane.org>
Cc: Roland Dreier <roland-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Cc: Vu Pham <vu-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Cc: Sebastian Riemer <sebastian.riemer-EIkl63zCoXaH+58JC4qpiA@public.gmane.org>
---
drivers/infiniband/ulp/srp/ib_srp.c | 123 +++++++++++++++++++++++++----------
drivers/infiniband/ulp/srp/ib_srp.h | 1 -
2 files changed, 88 insertions(+), 36 deletions(-)
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index ffda0ca..6f3e0e5 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -86,6 +86,31 @@ module_param(topspin_workarounds, int, 0444);
MODULE_PARM_DESC(topspin_workarounds,
"Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
+static int srp_reconnect_delay = 10;
+module_param_named(reconnect_delay, srp_reconnect_delay, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
+
+static struct kernel_param_ops srp_tmo_ops;
+
+static int srp_fast_io_fail_tmo = 15;
+module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
+ S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(fast_io_fail_tmo,
+ "Number of seconds between the observation of a transport"
+ " layer error and failing all I/O. \"off\" means that this"
+ " functionality is disabled.");
+
+static int srp_dev_loss_tmo = 600;
+module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
+ S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(dev_loss_tmo,
+ "Maximum number of seconds that the SRP transport should"
+ " insulate transport layer errors. After this time has been"
+ " exceeded the SCSI target is removed. Should be"
+ " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
+ " if fast_io_fail_tmo has not been set. \"off\" means that"
+ " this functionality is disabled.");
+
static void srp_add_one(struct ib_device *device);
static void srp_remove_one(struct ib_device *device);
static void srp_recv_completion(struct ib_cq *cq, void *target_ptr);
@@ -102,6 +127,44 @@ static struct ib_client srp_client = {
static struct ib_sa_client srp_sa_client;
+static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
+{
+ int tmo = *(int *)kp->arg;
+
+ if (tmo >= 0)
+ return sprintf(buffer, "%d", tmo);
+ else
+ return sprintf(buffer, "off");
+}
+
+static int srp_tmo_set(const char *val, const struct kernel_param *kp)
+{
+ int tmo, res;
+
+ if (strcmp(val, "off") != 0) {
+ res = kstrtoint(val, 0, &tmo);
+ if (res)
+ goto out;
+ } else {
+ tmo = -1;
+ }
+ if (kp->arg == &srp_fast_io_fail_tmo)
+ res = srp_tmo_valid(tmo, srp_dev_loss_tmo);
+ else
+ res = srp_tmo_valid(srp_fast_io_fail_tmo, tmo);
+ if (res)
+ goto out;
+ *(int *)kp->arg = tmo;
+
+out:
+ return res;
+}
+
+static struct kernel_param_ops srp_tmo_ops = {
+ .get = srp_tmo_get,
+ .set = srp_tmo_set,
+};
+
static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
{
return (struct srp_target_port *) host->hostdata;
@@ -739,13 +802,20 @@ static void srp_terminate_io(struct srp_rport *rport)
}
}
-static int srp_reconnect_target(struct srp_target_port *target)
+/*
+ * It is up to the caller to ensure that srp_rport_reconnect() calls are
+ * serialized and that no concurrent srp_queuecommand(), srp_abort(),
+ * srp_reset_device() or srp_reset_host() calls will occur while this function
+ * is in progress. One way to realize that is not to call this function
+ * directly but to call srp_reconnect_rport() instead since that last function
+ * serializes calls of this function via rport->mutex and also blocks
+ * srp_queuecommand() calls before invoking this function.
+ */
+static int srp_rport_reconnect(struct srp_rport *rport)
{
- struct Scsi_Host *shost = target->scsi_host;
+ struct srp_target_port *target = rport->lld_data;
int i, ret;
- scsi_target_block(&shost->shost_gendev);
-
srp_disconnect_target(target);
/*
* Now get a new local CM ID so that we avoid confusing the target in
@@ -775,28 +845,9 @@ static int srp_reconnect_target(struct srp_target_port *target)
if (ret == 0)
ret = srp_connect_target(target);
- scsi_target_unblock(&shost->shost_gendev, ret == 0 ? SDEV_RUNNING :
- SDEV_TRANSPORT_OFFLINE);
- target->transport_offline = !!ret;
-
- if (ret)
- goto err;
-
- shost_printk(KERN_INFO, target->scsi_host, PFX "reconnect succeeded\n");
-
- return ret;
-
-err:
- shost_printk(KERN_ERR, target->scsi_host,
- PFX "reconnect failed (%d), removing target port.\n", ret);
-
- /*
- * We couldn't reconnect, so kill our target port off.
- * However, we have to defer the real removal because we
- * are in the context of the SCSI error handler now, which
- * will deadlock if we call scsi_remove_host().
- */
- srp_queue_remove_work(target);
+ if (ret == 0)
+ shost_printk(KERN_INFO, target->scsi_host,
+ PFX "reconnect succeeded\n");
return ret;
}
@@ -1395,10 +1446,11 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
struct srp_cmd *cmd;
struct ib_device *dev;
unsigned long flags;
- int len;
+ int len, result;
- if (unlikely(target->transport_offline)) {
- scmnd->result = DID_NO_CONNECT << 16;
+ result = srp_chkready(target->rport);
+ if (unlikely(result)) {
+ scmnd->result = result;
scmnd->scsi_done(scmnd);
return 0;
}
@@ -1793,7 +1845,7 @@ static int srp_abort(struct scsi_cmnd *scmnd)
return FAILED;
if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun,
SRP_TSK_ABORT_TASK) == 0 ||
- target->transport_offline)
+ srp_chkready(target->rport) != 0)
ret = SUCCESS;
else
ret = FAILED;
@@ -1829,14 +1881,10 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
static int srp_reset_host(struct scsi_cmnd *scmnd)
{
struct srp_target_port *target = host_to_target(scmnd->device->host);
- int ret = FAILED;
shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
- if (!srp_reconnect_target(target))
- ret = SUCCESS;
-
- return ret;
+ return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
}
static int srp_slave_configure(struct scsi_device *sdev)
@@ -2599,6 +2647,11 @@ static void srp_remove_one(struct ib_device *device)
}
static struct srp_function_template ib_srp_transport_functions = {
+ .has_rport_state = true,
+ .reconnect_delay = &srp_reconnect_delay,
+ .fast_io_fail_tmo = &srp_fast_io_fail_tmo,
+ .dev_loss_tmo = &srp_dev_loss_tmo,
+ .reconnect = srp_rport_reconnect,
.rport_delete = srp_rport_delete,
.terminate_rport_io = srp_terminate_io,
};
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index 1817ed5..fda82f7 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -140,7 +140,6 @@ struct srp_target_port {
unsigned int cmd_sg_cnt;
unsigned int indirect_size;
bool allow_ext_sg;
- bool transport_offline;
/* Everything above this point is used in the hot path of
* command processing. Try to keep them packed into cachelines.
--
1.7.10.4
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
next prev parent reply other threads:[~2013-06-12 13:30 UTC|newest]
Thread overview: 63+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-06-12 13:17 [PATCH 0/14] IB SRP initiator patches for kernel 3.11 Bart Van Assche
2013-06-12 13:28 ` [PATCH 07/14] scsi_transport_srp: Add transport layer error handling Bart Van Assche
[not found] ` <51B8777B.5050201-HInyCGIudOg@public.gmane.org>
2013-06-13 19:43 ` Vu Pham
2013-06-14 13:19 ` Bart Van Assche
[not found] ` <51BB1857.7040802-HInyCGIudOg@public.gmane.org>
2013-06-14 17:59 ` Vu Pham
[not found] ` <51BB5A04.3080901-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2013-06-15 9:52 ` Bart Van Assche
[not found] ` <51BC3945.9030900-HInyCGIudOg@public.gmane.org>
2013-06-17 6:18 ` Hannes Reinecke
2013-06-17 7:04 ` Bart Van Assche
2013-06-17 7:14 ` Hannes Reinecke
2013-06-17 7:29 ` Bart Van Assche
[not found] ` <51BEBAEA.4080202-HInyCGIudOg@public.gmane.org>
2013-06-17 8:10 ` Hannes Reinecke
2013-06-17 10:13 ` Sebastian Riemer
2013-06-18 16:59 ` Vu Pham
[not found] ` <51C09202.2040503-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2013-06-19 13:00 ` Bart Van Assche
2013-06-23 21:13 ` Mike Christie
[not found] ` <51C764FB.6070207-hcNo3dDEHLuVc3sceRu5cw@public.gmane.org>
2013-06-24 7:37 ` Bart Van Assche
[not found] ` <51B87501.4070005-HInyCGIudOg@public.gmane.org>
2013-06-12 13:20 ` [PATCH 01/14] IB/srp: Fix remove_one crash due to resource exhaustion Bart Van Assche
[not found] ` <51B875A4.7040903-HInyCGIudOg@public.gmane.org>
2013-06-12 13:38 ` Bart Van Assche
[not found] ` <51B879CF.1080802-HInyCGIudOg@public.gmane.org>
2013-06-12 14:24 ` Sebastian Riemer
2013-06-27 21:01 ` David Dillow
[not found] ` <1372366870.32164.30.camel-zHLflQxYYDO4Hhoo1DtQwJ9G+ZOsUmrO@public.gmane.org>
2013-06-27 23:45 ` Roland Dreier
[not found] ` <CAL1RGDWVgAKSL-GNZCkP1FEt9r_y5QWp+74NzDcga6+tcvWpXw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2013-06-28 7:41 ` Sebastian Riemer
2013-06-12 13:21 ` [PATCH 02/14] IB/srp: Fix race between srp_queuecommand() and srp_claim_req() Bart Van Assche
[not found] ` <51B875EE.3030702-HInyCGIudOg@public.gmane.org>
2013-06-12 14:58 ` Sebastian Riemer
[not found] ` <51B88C7C.4030209-EIkl63zCoXaH+58JC4qpiA@public.gmane.org>
2013-06-12 15:14 ` Bart Van Assche
[not found] ` <51B8903E.3000609-HInyCGIudOg@public.gmane.org>
2013-06-27 21:02 ` David Dillow
[not found] ` <1372366945.32164.32.camel-zHLflQxYYDO4Hhoo1DtQwJ9G+ZOsUmrO@public.gmane.org>
2013-06-28 7:36 ` Bart Van Assche
2013-06-12 13:23 ` [PATCH 03/14] IB/srp: Avoid that srp_reset_host() is skipped after a TL error Bart Van Assche
[not found] ` <51B87638.50102-HInyCGIudOg@public.gmane.org>
2013-06-13 9:30 ` Sebastian Riemer
[not found] ` <51B99120.9000503-EIkl63zCoXaH+58JC4qpiA@public.gmane.org>
2013-06-13 9:57 ` Bart Van Assche
2013-06-27 21:03 ` David Dillow
2013-06-12 13:24 ` [PATCH 04/14] IB/srp: Skip host settle delay Bart Van Assche
[not found] ` <51B87689.8030806-HInyCGIudOg@public.gmane.org>
2013-06-13 9:53 ` Sebastian Riemer
[not found] ` <51B996A1.6080604-EIkl63zCoXaH+58JC4qpiA@public.gmane.org>
2013-06-13 13:06 ` Or Gerlitz
2013-06-27 21:04 ` David Dillow
2013-06-12 13:25 ` [PATCH 05/14] IB/srp: Maintain a single connection per I_T nexus Bart Van Assche
[not found] ` <51B876BF.4070400-HInyCGIudOg@public.gmane.org>
2013-06-13 13:57 ` Sebastian Riemer
[not found] ` <51B9CFC3.8080008-EIkl63zCoXaH+58JC4qpiA@public.gmane.org>
2013-06-13 15:07 ` Bart Van Assche
[not found] ` <51B9E046.3030008-HInyCGIudOg@public.gmane.org>
2013-06-13 15:35 ` Sebastian Riemer
2013-06-13 17:50 ` Vu Pham
[not found] ` <51BA0655.6090707-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2013-06-13 18:25 ` Bart Van Assche
[not found] ` <51BA0E8F.3030104-HInyCGIudOg@public.gmane.org>
2013-06-13 23:27 ` Vu Pham
[not found] ` <51BA555F.9060807-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2013-06-14 9:38 ` Sebastian Riemer
[not found] ` <51BAE482.1050304-EIkl63zCoXaH+58JC4qpiA@public.gmane.org>
2013-06-14 17:07 ` Vu Pham
[not found] ` <51BB4DBB.4070800-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2013-06-17 9:41 ` Sebastian Riemer
2013-06-27 21:10 ` David Dillow
[not found] ` <1372367432.32164.36.camel-zHLflQxYYDO4Hhoo1DtQwJ9G+ZOsUmrO@public.gmane.org>
2013-06-28 7:40 ` Bart Van Assche
2013-06-12 13:26 ` [PATCH 06/14] IB/srp: Keep rport as long as the IB transport layer Bart Van Assche
2013-06-12 13:29 ` [PATCH 08/14] IB/srp: Add srp_terminate_io() Bart Van Assche
2013-06-12 13:30 ` Bart Van Assche [this message]
2013-06-12 13:31 ` [PATCH 10/14] IB/srp: Start timers if a transport layer error occurs Bart Van Assche
2013-06-12 13:33 ` [PATCH 11/14] IB/srp: Fail SCSI commands silently Bart Van Assche
2013-06-12 13:35 ` [PATCH 12/14] IB/srp: Make HCA completion vector configurable Bart Van Assche
[not found] ` <51B87904.1070803-HInyCGIudOg@public.gmane.org>
2013-06-27 21:24 ` David Dillow
[not found] ` <1372368256.32164.41.camel-zHLflQxYYDO4Hhoo1DtQwJ9G+ZOsUmrO@public.gmane.org>
2013-06-28 8:18 ` Bart Van Assche
[not found] ` <51CD46F0.60301-HInyCGIudOg@public.gmane.org>
2013-06-28 12:04 ` David Dillow
[not found] ` <1372421041.28740.14.camel-a7a0dvSY7KqLUyTwlgNVppKKF0rrzTr+@public.gmane.org>
2013-06-28 12:29 ` Bart Van Assche
2013-06-12 13:36 ` [PATCH 13/14] IB/srp: Make transport layer retry count configurable Bart Van Assche
[not found] ` <51B8794F.6050003-HInyCGIudOg@public.gmane.org>
2013-06-27 21:22 ` David Dillow
[not found] ` <1372368138.32164.40.camel-zHLflQxYYDO4Hhoo1DtQwJ9G+ZOsUmrO@public.gmane.org>
2013-06-28 8:28 ` Bart Van Assche
[not found] ` <51CD4933.5080709-HInyCGIudOg@public.gmane.org>
2013-06-28 12:07 ` David Dillow
[not found] ` <1372421227.28740.17.camel-a7a0dvSY7KqLUyTwlgNVppKKF0rrzTr+@public.gmane.org>
2013-06-28 12:30 ` Bart Van Assche
2013-06-12 13:37 ` [PATCH 14/14] IB/srp: Bump driver version and release date Bart Van Assche
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=51B877F6.6050000@acm.org \
--to=bvanassche-hinycgiudog@public.gmane.org \
--cc=dillowda-1Heg1YXhbW8@public.gmane.org \
--cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=roland-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org \
--cc=sebastian.riemer-EIkl63zCoXaH+58JC4qpiA@public.gmane.org \
--cc=vuhuong-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox