From: Bart Van Assche <bvanassche-HInyCGIudOg@public.gmane.org>
To: Roland Dreier <roland-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Cc: David Dillow <dave-i1Mk8JYDVaaSihdK6806/g@public.gmane.org>,
Vu Pham <vuhuong-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>,
Sebastian Riemer
<sebastian.riemer-EIkl63zCoXaH+58JC4qpiA@public.gmane.org>,
linux-rdma <linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>
Subject: [PATCH 4/8] IB/srp: Use SRP transport layer error recovery
Date: Tue, 20 Aug 2013 14:46:01 +0200 [thread overview]
Message-ID: <52136509.3050703@acm.org> (raw)
In-Reply-To: <521363EA.8080906-HInyCGIudOg@public.gmane.org>
Enable reconnect_delay, fast_io_fail_tmo and dev_loss_tmo
functionality for the IB SRP initiator. Add kernel module
parameters that allow to specify default values for these
three parameters.
Signed-off-by: Bart Van Assche <bvanassche-HInyCGIudOg@public.gmane.org>
Acked-by: David Dillow <dillowda-1Heg1YXhbW8@public.gmane.org>
Cc: Roland Dreier <roland-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Cc: Vu Pham <vu-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Cc: Sebastian Riemer <sebastian.riemer-EIkl63zCoXaH+58JC4qpiA@public.gmane.org>
---
drivers/infiniband/ulp/srp/ib_srp.c | 129 +++++++++++++++++++++++++----------
drivers/infiniband/ulp/srp/ib_srp.h | 1 -
2 files changed, 94 insertions(+), 36 deletions(-)
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 37dd3fb..a7fa7ed 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -86,6 +86,32 @@ module_param(topspin_workarounds, int, 0444);
MODULE_PARM_DESC(topspin_workarounds,
"Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
+static struct kernel_param_ops srp_tmo_ops;
+
+static int srp_reconnect_delay = 10;
+module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
+ S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
+
+static int srp_fast_io_fail_tmo = 15;
+module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
+ S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(fast_io_fail_tmo,
+ "Number of seconds between the observation of a transport"
+ " layer error and failing all I/O. \"off\" means that this"
+ " functionality is disabled.");
+
+static int srp_dev_loss_tmo = 600;
+module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
+ S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(dev_loss_tmo,
+ "Maximum number of seconds that the SRP transport should"
+ " insulate transport layer errors. After this time has been"
+ " exceeded the SCSI target is removed. Should be"
+ " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
+ " if fast_io_fail_tmo has not been set. \"off\" means that"
+ " this functionality is disabled.");
+
static void srp_add_one(struct ib_device *device);
static void srp_remove_one(struct ib_device *device);
static void srp_recv_completion(struct ib_cq *cq, void *target_ptr);
@@ -102,6 +128,48 @@ static struct ib_client srp_client = {
static struct ib_sa_client srp_sa_client;
+static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
+{
+ int tmo = *(int *)kp->arg;
+
+ if (tmo >= 0)
+ return sprintf(buffer, "%d", tmo);
+ else
+ return sprintf(buffer, "off");
+}
+
+static int srp_tmo_set(const char *val, const struct kernel_param *kp)
+{
+ int tmo, res;
+
+ if (strncmp(val, "off", 3) != 0) {
+ res = kstrtoint(val, 0, &tmo);
+ if (res)
+ goto out;
+ } else {
+ tmo = -1;
+ }
+ if (kp->arg == &srp_reconnect_delay)
+ res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
+ srp_dev_loss_tmo);
+ else if (kp->arg == &srp_fast_io_fail_tmo)
+ res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
+ else
+ res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
+ tmo);
+ if (res)
+ goto out;
+ *(int *)kp->arg = tmo;
+
+out:
+ return res;
+}
+
+static struct kernel_param_ops srp_tmo_ops = {
+ .get = srp_tmo_get,
+ .set = srp_tmo_set,
+};
+
static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
{
return (struct srp_target_port *) host->hostdata;
@@ -711,13 +779,20 @@ static void srp_terminate_io(struct srp_rport *rport)
}
}
-static int srp_reconnect_target(struct srp_target_port *target)
+/*
+ * It is up to the caller to ensure that srp_rport_reconnect() calls are
+ * serialized and that no concurrent srp_queuecommand(), srp_abort(),
+ * srp_reset_device() or srp_reset_host() calls will occur while this function
+ * is in progress. One way to realize that is not to call this function
+ * directly but to call srp_reconnect_rport() instead since that last function
+ * serializes calls of this function via rport->mutex and also blocks
+ * srp_queuecommand() calls before invoking this function.
+ */
+static int srp_rport_reconnect(struct srp_rport *rport)
{
- struct Scsi_Host *shost = target->scsi_host;
+ struct srp_target_port *target = rport->lld_data;
int i, ret;
- scsi_target_block(&shost->shost_gendev);
-
srp_disconnect_target(target);
/*
* Now get a new local CM ID so that we avoid confusing the target in
@@ -747,28 +822,9 @@ static int srp_reconnect_target(struct srp_target_port *target)
if (ret == 0)
ret = srp_connect_target(target);
- scsi_target_unblock(&shost->shost_gendev, ret == 0 ? SDEV_RUNNING :
- SDEV_TRANSPORT_OFFLINE);
- target->transport_offline = !!ret;
-
- if (ret)
- goto err;
-
- shost_printk(KERN_INFO, target->scsi_host, PFX "reconnect succeeded\n");
-
- return ret;
-
-err:
- shost_printk(KERN_ERR, target->scsi_host,
- PFX "reconnect failed (%d), removing target port.\n", ret);
-
- /*
- * We couldn't reconnect, so kill our target port off.
- * However, we have to defer the real removal because we
- * are in the context of the SCSI error handler now, which
- * will deadlock if we call scsi_remove_host().
- */
- srp_queue_remove_work(target);
+ if (ret == 0)
+ shost_printk(KERN_INFO, target->scsi_host,
+ PFX "reconnect succeeded\n");
return ret;
}
@@ -1367,10 +1423,11 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
struct srp_cmd *cmd;
struct ib_device *dev;
unsigned long flags;
- int len;
+ int len, result;
- if (unlikely(target->transport_offline)) {
- scmnd->result = DID_NO_CONNECT << 16;
+ result = srp_chkready(target->rport);
+ if (unlikely(result)) {
+ scmnd->result = result;
scmnd->scsi_done(scmnd);
return 0;
}
@@ -1768,7 +1825,7 @@ static int srp_abort(struct scsi_cmnd *scmnd)
if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun,
SRP_TSK_ABORT_TASK) == 0)
ret = SUCCESS;
- else if (target->transport_offline)
+ else if (target->rport->state == SRP_RPORT_LOST)
ret = FAST_IO_FAIL;
else
ret = FAILED;
@@ -1804,14 +1861,10 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
static int srp_reset_host(struct scsi_cmnd *scmnd)
{
struct srp_target_port *target = host_to_target(scmnd->device->host);
- int ret = FAILED;
shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
- if (!srp_reconnect_target(target))
- ret = SUCCESS;
-
- return ret;
+ return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
}
static int srp_slave_configure(struct scsi_device *sdev)
@@ -2626,6 +2679,12 @@ static void srp_remove_one(struct ib_device *device)
}
static struct srp_function_template ib_srp_transport_functions = {
+ .has_rport_state = true,
+ .reset_timer_if_blocked = true,
+ .reconnect_delay = &srp_reconnect_delay,
+ .fast_io_fail_tmo = &srp_fast_io_fail_tmo,
+ .dev_loss_tmo = &srp_dev_loss_tmo,
+ .reconnect = srp_rport_reconnect,
.rport_delete = srp_rport_delete,
.terminate_rport_io = srp_terminate_io,
};
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index 02392f5..b62a943 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -140,7 +140,6 @@ struct srp_target_port {
unsigned int cmd_sg_cnt;
unsigned int indirect_size;
bool allow_ext_sg;
- bool transport_offline;
/* Everything above this point is used in the hot path of
* command processing. Try to keep them packed into cachelines.
--
1.7.10.4
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
next prev parent reply other threads:[~2013-08-20 12:46 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-08-20 12:41 [PATCH 0/8] IB SRP initiator patches for kernel 3.12 Bart Van Assche
[not found] ` <521363EA.8080906-HInyCGIudOg@public.gmane.org>
2013-08-20 12:43 ` [PATCH 1/8] IB/srp: Keep rport as long as the IB transport layer Bart Van Assche
2013-08-20 12:44 ` [PATCH 2/8] scsi_transport_srp: Add transport layer error handling Bart Van Assche
2013-08-20 12:45 ` [PATCH 3/8] IB/srp: Add srp_terminate_io() Bart Van Assche
2013-08-20 12:46 ` Bart Van Assche [this message]
2013-08-20 12:46 ` [PATCH 5/8] IB/srp: Start timers if a transport layer error occurs Bart Van Assche
2013-08-20 12:47 ` [PATCH 6/8] IB/srp: Make transport layer retry count configurable Bart Van Assche
2013-08-20 12:48 ` [PATCH 7/8] IB/srp: Introduce srp_alloc_req_data() Bart Van Assche
2013-08-20 12:50 ` [PATCH 8/8] IB/srp: Make queue size configurable Bart Van Assche
[not found] ` <52136609.3090406-HInyCGIudOg@public.gmane.org>
2013-08-20 15:34 ` Sagi Grimberg
[not found] ` <52138C6E.6080201-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2013-08-20 15:55 ` Bart Van Assche
[not found] ` <5213917B.3020403-HInyCGIudOg@public.gmane.org>
2013-08-20 17:43 ` David Dillow
[not found] ` <1377020595.22321.6.camel-zHLflQxYYDO4Hhoo1DtQwJ9G+ZOsUmrO@public.gmane.org>
2013-08-21 7:19 ` Sagi Grimberg
2013-09-10 3:01 ` David Dillow
[not found] ` <1378782080.3794.6.camel-VK19RVc5TWXUd6DVheFtbw@public.gmane.org>
2013-09-10 17:44 ` Bart Van Assche
[not found] ` <522F5A81.8040101-HInyCGIudOg@public.gmane.org>
2013-09-11 22:16 ` David Dillow
[not found] ` <1378937796.6649.5.camel-a7a0dvSY7KqLUyTwlgNVppKKF0rrzTr+@public.gmane.org>
2013-09-12 16:16 ` Jack Wang
[not found] ` <5231E8CE.5060105-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2013-09-12 16:30 ` Bart Van Assche
[not found] ` <5231EC1A.7030902-HInyCGIudOg@public.gmane.org>
2013-09-13 8:06 ` Jack Wang
[not found] ` <5232C76B.4010704-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2013-09-13 8:40 ` Bart Van Assche
[not found] ` <5232CF86.20507-HInyCGIudOg@public.gmane.org>
2013-09-13 9:24 ` Bart Van Assche
[not found] ` <5232D9BC.7090808-HInyCGIudOg@public.gmane.org>
2013-09-13 12:25 ` Jack Wang
[not found] ` <5233043F.5020804-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2013-09-13 13:33 ` Bart Van Assche
[not found] ` <52331444.8070007-HInyCGIudOg@public.gmane.org>
2013-09-13 13:51 ` Jack Wang
[not found] ` <52331854.9010607-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2013-09-13 14:03 ` Bart Van Assche
[not found] ` <52331B47.9070202-HInyCGIudOg@public.gmane.org>
2013-09-13 14:15 ` Jack Wang
[not found] ` <52331E01.3060005-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2013-09-13 14:31 ` Jack Wang
2013-09-13 14:31 ` Bart Van Assche
2013-09-16 14:25 ` Bart Van Assche
[not found] ` <523714D8.3020104-HInyCGIudOg@public.gmane.org>
2013-09-16 14:28 ` David Dillow
2013-09-10 2:53 ` [PATCH 0/8] IB SRP initiator patches for kernel 3.12 David Dillow
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=52136509.3050703@acm.org \
--to=bvanassche-hinycgiudog@public.gmane.org \
--cc=dave-i1Mk8JYDVaaSihdK6806/g@public.gmane.org \
--cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=roland-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org \
--cc=sebastian.riemer-EIkl63zCoXaH+58JC4qpiA@public.gmane.org \
--cc=vuhuong-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.