[PATCH 06/10] scsi_transport_srp: Add periodic reconnect support

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Bart Van Assche <bvanassche-HInyCGIudOg@public.gmane.org>
To: David Dillow <dillowda-1Heg1YXhbW8@public.gmane.org>,
	Roland Dreier <roland-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>,
	Vu Pham <vu-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>,
	Sebastian Riemer
	<sebastian.riemer-EIkl63zCoXaH+58JC4qpiA@public.gmane.org>,
	linux-rdma <linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>,
	linux-scsi <linux-scsi-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>,
	James Bottomley
	<jbottomley-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
Subject: [PATCH 06/10] scsi_transport_srp: Add periodic reconnect support
Date: Thu, 10 Oct 2013 14:15:49 +0200	[thread overview]
Message-ID: <52569A75.30200@acm.org> (raw)
In-Reply-To: <52569806.9080201-HInyCGIudOg@public.gmane.org>

Add support for periodically reconnecting to an SRP target until
the dev_loss timer expires. After the tenth reconnection attempt,
gradually slow down subsequent reconnect attempts.

Signed-off-by: Bart Van Assche <bvanassche-HInyCGIudOg@public.gmane.org>
Cc: David Dillow <dillowda-1Heg1YXhbW8@public.gmane.org>
Cc: Roland Dreier <roland-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Cc: James Bottomley <JBottomley-MU7nAjRaF3makBO8gow8eQ@public.gmane.org>
Cc: Vu Pham <vu-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Cc: Sebastian Riemer <sebastian.riemer-EIkl63zCoXaH+58JC4qpiA@public.gmane.org>
---
 drivers/scsi/scsi_transport_srp.c | 106 +++++++++++++++++++++++++++++++++++---
 include/scsi/scsi_transport_srp.h |  11 +++-
 2 files changed, 108 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c
index 675e203..04a97b6 100644
--- a/drivers/scsi/scsi_transport_srp.c
+++ b/drivers/scsi/scsi_transport_srp.c
@@ -41,7 +41,7 @@ struct srp_host_attrs {
 #define to_srp_host_attrs(host)	((struct srp_host_attrs *)(host)->shost_data)
 
 #define SRP_HOST_ATTRS 0
-#define SRP_RPORT_ATTRS 6
+#define SRP_RPORT_ATTRS 8
 
 struct srp_internal {
 	struct scsi_transport_template t;
@@ -69,11 +69,13 @@ static inline struct Scsi_Host *rport_to_shost(struct srp_rport *r)
  * are finished in a reasonable time. Hence do not allow the fast I/O fail
  * timeout to exceed SCSI_DEVICE_BLOCK_MAX_TIMEOUT. Furthermore, these
  * parameters must be such that multipath can detect failed paths timely.
- * Hence do not allow both parameters to be disabled simultaneously.
+ * Hence do not allow all three parameters to be disabled simultaneously.
  */
-int srp_tmo_valid(int fast_io_fail_tmo, int dev_loss_tmo)
+int srp_tmo_valid(int reconnect_delay, int fast_io_fail_tmo, int dev_loss_tmo)
 {
-	if (fast_io_fail_tmo < 0 && dev_loss_tmo < 0)
+	if (reconnect_delay < 0 && fast_io_fail_tmo < 0 && dev_loss_tmo < 0)
+		return -EINVAL;
+	if (reconnect_delay == 0)
 		return -EINVAL;
 	if (fast_io_fail_tmo > SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
 		return -EINVAL;
@@ -202,6 +204,56 @@ static int srp_parse_tmo(int *tmo, const char *buf)
 	return res;
 }
 
+static ssize_t show_reconnect_delay(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct srp_rport *rport = transport_class_to_srp_rport(dev);
+
+	return srp_show_tmo(buf, rport->reconnect_delay);
+}
+
+static ssize_t store_reconnect_delay(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, const size_t count)
+{
+	struct srp_rport *rport = transport_class_to_srp_rport(dev);
+	int res, delay;
+
+	res = srp_parse_tmo(&delay, buf);
+	if (res)
+		goto out;
+	res = srp_tmo_valid(delay, rport->fast_io_fail_tmo,
+			    rport->dev_loss_tmo);
+	if (res)
+		goto out;
+
+	if (rport->reconnect_delay <= 0 && delay > 0 &&
+	    rport->state != SRP_RPORT_RUNNING) {
+		queue_delayed_work(system_long_wq, &rport->reconnect_work,
+				   delay * HZ);
+	} else if (delay <= 0) {
+		cancel_delayed_work(&rport->reconnect_work);
+	}
+	rport->reconnect_delay = delay;
+	res = count;
+
+out:
+	return res;
+}
+
+static DEVICE_ATTR(reconnect_delay, S_IRUGO | S_IWUSR, show_reconnect_delay,
+		   store_reconnect_delay);
+
+static ssize_t show_failed_reconnects(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct srp_rport *rport = transport_class_to_srp_rport(dev);
+
+	return sprintf(buf, "%d\n", rport->failed_reconnects);
+}
+
+static DEVICE_ATTR(failed_reconnects, S_IRUGO, show_failed_reconnects, NULL);
+
 static ssize_t show_srp_rport_fast_io_fail_tmo(struct device *dev,
 					       struct device_attribute *attr,
 					       char *buf)
@@ -222,7 +274,8 @@ static ssize_t store_srp_rport_fast_io_fail_tmo(struct device *dev,
 	res = srp_parse_tmo(&fast_io_fail_tmo, buf);
 	if (res)
 		goto out;
-	res = srp_tmo_valid(fast_io_fail_tmo, rport->dev_loss_tmo);
+	res = srp_tmo_valid(rport->reconnect_delay, fast_io_fail_tmo,
+			    rport->dev_loss_tmo);
 	if (res)
 		goto out;
 	rport->fast_io_fail_tmo = fast_io_fail_tmo;
@@ -256,7 +309,8 @@ static ssize_t store_srp_rport_dev_loss_tmo(struct device *dev,
 	res = srp_parse_tmo(&dev_loss_tmo, buf);
 	if (res)
 		goto out;
-	res = srp_tmo_valid(rport->fast_io_fail_tmo, dev_loss_tmo);
+	res = srp_tmo_valid(rport->reconnect_delay, rport->fast_io_fail_tmo,
+			    dev_loss_tmo);
 	if (res)
 		goto out;
 	rport->dev_loss_tmo = dev_loss_tmo;
@@ -312,6 +366,29 @@ invalid:
 	return -EINVAL;
 }
 
+/**
+ * srp_reconnect_work() - reconnect and schedule a new attempt if necessary
+ */
+static void srp_reconnect_work(struct work_struct *work)
+{
+	struct srp_rport *rport = container_of(to_delayed_work(work),
+					struct srp_rport, reconnect_work);
+	struct Scsi_Host *shost = rport_to_shost(rport);
+	int delay, res;
+
+	res = srp_reconnect_rport(rport);
+	if (res != 0) {
+		shost_printk(KERN_ERR, shost,
+			     "reconnect attempt %d failed (%d)\n",
+			     ++rport->failed_reconnects, res);
+		delay = rport->reconnect_delay *
+			min(100, max(1, rport->failed_reconnects - 10));
+		if (delay > 0)
+			queue_delayed_work(system_long_wq,
+					   &rport->reconnect_work, delay * HZ);
+	}
+}
+
 static void __rport_fail_io_fast(struct srp_rport *rport)
 {
 	struct Scsi_Host *shost = rport_to_shost(rport);
@@ -370,16 +447,21 @@ static void rport_dev_loss_timedout(struct work_struct *work)
 static void __srp_start_tl_fail_timers(struct srp_rport *rport)
 {
 	struct Scsi_Host *shost = rport_to_shost(rport);
-	int fast_io_fail_tmo, dev_loss_tmo;
+	int delay, fast_io_fail_tmo, dev_loss_tmo;
 
 	lockdep_assert_held(&rport->mutex);
 
 	if (!rport->deleted) {
+		delay = rport->reconnect_delay;
 		fast_io_fail_tmo = rport->fast_io_fail_tmo;
 		dev_loss_tmo = rport->dev_loss_tmo;
 		pr_debug("%s current state: %d\n",
 			 dev_name(&shost->shost_gendev), rport->state);
 
+		if (delay > 0)
+			queue_delayed_work(system_long_wq,
+					   &rport->reconnect_work,
+					   1UL * delay * HZ);
 		if (fast_io_fail_tmo >= 0 &&
 		    srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) {
 			pr_debug("%s new state: %d\n",
@@ -480,6 +562,7 @@ int srp_reconnect_rport(struct srp_rport *rport)
 		cancel_delayed_work(&rport->fast_io_fail_work);
 		cancel_delayed_work(&rport->dev_loss_work);
 
+		rport->failed_reconnects = 0;
 		srp_rport_set_state(rport, SRP_RPORT_RUNNING);
 		scsi_target_unblock(&shost->shost_gendev, SDEV_RUNNING);
 		/*
@@ -538,6 +621,7 @@ static void srp_rport_release(struct device *dev)
 {
 	struct srp_rport *rport = dev_to_rport(dev);
 
+	cancel_delayed_work_sync(&rport->reconnect_work);
 	cancel_delayed_work_sync(&rport->fast_io_fail_work);
 	cancel_delayed_work_sync(&rport->dev_loss_work);
 
@@ -634,6 +718,10 @@ struct srp_rport *srp_rport_add(struct Scsi_Host *shost,
 	memcpy(rport->port_id, ids->port_id, sizeof(rport->port_id));
 	rport->roles = ids->roles;
 
+	if (i->f->reconnect)
+		rport->reconnect_delay = i->f->reconnect_delay ?
+			*i->f->reconnect_delay : 10;
+	INIT_DELAYED_WORK(&rport->reconnect_work, srp_reconnect_work);
 	rport->fast_io_fail_tmo = i->f->fast_io_fail_tmo ?
 		*i->f->fast_io_fail_tmo : 15;
 	rport->dev_loss_tmo = i->f->dev_loss_tmo ? *i->f->dev_loss_tmo : 60;
@@ -772,6 +860,10 @@ srp_attach_transport(struct srp_function_template *ft)
 		i->rport_attrs[count++] = &dev_attr_fast_io_fail_tmo;
 		i->rport_attrs[count++] = &dev_attr_dev_loss_tmo;
 	}
+	if (ft->reconnect) {
+		i->rport_attrs[count++] = &dev_attr_reconnect_delay;
+		i->rport_attrs[count++] = &dev_attr_failed_reconnects;
+	}
 	if (ft->rport_delete)
 		i->rport_attrs[count++] = &dev_attr_delete;
 	i->rport_attrs[count++] = NULL;
diff --git a/include/scsi/scsi_transport_srp.h b/include/scsi/scsi_transport_srp.h
index ee70016..4ebf691 100644
--- a/include/scsi/scsi_transport_srp.h
+++ b/include/scsi/scsi_transport_srp.h
@@ -31,7 +31,8 @@ enum srp_rport_state {
 /**
  * struct srp_rport
  * @lld_data: LLD private data.
- * @mutex:    Protects against concurrent rport fast_io_fail / dev_loss_tmo.
+ * @mutex:    Protects against concurrent rport reconnect / fast_io_fail /
+ *   dev_loss_tmo activity.
  */
 struct srp_rport {
 	/* for initiator and target drivers */
@@ -48,6 +49,9 @@ struct srp_rport {
 	struct mutex		mutex;
 	enum srp_rport_state	state;
 	bool			deleted;
+	int			reconnect_delay;
+	int			failed_reconnects;
+	struct delayed_work	reconnect_work;
 	int			fast_io_fail_tmo;
 	int			dev_loss_tmo;
 	struct delayed_work	fast_io_fail_work;
@@ -60,6 +64,7 @@ struct srp_rport {
  *     dev_loss_tmo sysfs attribute for an rport.
  * @reset_timer_if_blocked: Whether or srp_timed_out() should reset the command
  *     timer if the device on which it has been queued is blocked.
+ * @reconnect_delay: If not NULL, points to the default reconnect_delay value.
  * @fast_io_fail_tmo: If not NULL, points to the default fast_io_fail_tmo value.
  * @dev_loss_tmo: If not NULL, points to the default dev_loss_tmo value.
  * @reconnect: Callback function for reconnecting to the target. See also
@@ -71,6 +76,7 @@ struct srp_function_template {
 	/* for initiator drivers */
 	bool has_rport_state;
 	bool reset_timer_if_blocked;
+	int *reconnect_delay;
 	int *fast_io_fail_tmo;
 	int *dev_loss_tmo;
 	int (*reconnect)(struct srp_rport *rport);
@@ -90,7 +96,8 @@ extern void srp_rport_put(struct srp_rport *rport);
 extern struct srp_rport *srp_rport_add(struct Scsi_Host *,
 				       struct srp_rport_identifiers *);
 extern void srp_rport_del(struct srp_rport *);
-extern int srp_tmo_valid(int fast_io_fail_tmo, int dev_loss_tmo);
+extern int srp_tmo_valid(int reconnect_delay, int fast_io_fail_tmo,
+			 int dev_loss_tmo);
 extern int srp_reconnect_rport(struct srp_rport *rport);
 extern void srp_start_tl_fail_timers(struct srp_rport *rport);
 extern void srp_remove_host(struct Scsi_Host *);
-- 
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

next prev parent reply	other threads:[~2013-10-10 12:15 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-10-10 12:05 [PATCH 0/10] IB SRP initiator patches for kernel 3.12 Bart Van Assche
2013-10-10 12:10 ` [PATCH 02/10] IB/srp: Keep rport as long as the IB transport layer Bart Van Assche
     [not found] ` <52569806.9080201-HInyCGIudOg@public.gmane.org>
2013-10-10 12:08   ` [PATCH 01/10] IB/srp: Make transport layer retry count configurable Bart Van Assche
2013-10-10 12:12   ` [PATCH 03/10] scsi_transport_srp: Add transport layer error handling Bart Van Assche
     [not found]     ` <525699A9.6090802-HInyCGIudOg@public.gmane.org>
2013-10-19 16:13       ` Bart Van Assche
     [not found]         ` <5262AF8D.7070700-HInyCGIudOg@public.gmane.org>
2013-10-25 22:42           ` David Dillow
2013-10-26  6:30             ` Bart Van Assche
2013-10-10 12:13   ` [PATCH 04/10] IB/srp: Use SRP transport layer error recovery Bart Van Assche
2013-10-10 12:14   ` [PATCH 05/10] IB/srp: Start timers if a transport layer error occurs Bart Van Assche
2013-10-10 12:15   ` Bart Van Assche [this message]
2013-10-10 12:16   ` [PATCH 07/10] IB/srp: Add periodic reconnect functionality Bart Van Assche
2013-10-10 12:18   ` [PATCH 08/10] IB/srp: Export sgid to sysfs Bart Van Assche
2013-10-10 12:18   ` [PATCH 09/10] IB/srp: Introduce srp_alloc_req_data() Bart Van Assche
2013-10-10 12:19   ` [PATCH 10/10] IB/srp: Make queue size configurable Bart Van Assche
     [not found]     ` <52569B6A.30301-HInyCGIudOg@public.gmane.org>
2013-10-11 11:34       ` Jack Wang
2013-10-25 22:13       ` David Dillow
     [not found]         ` <1382739213.17280.3.camel-zHLflQxYYDO4Hhoo1DtQwJ9G+ZOsUmrO@public.gmane.org>
2013-10-26  6:13           ` Bart Van Assche
2013-10-25 22:14   ` [PATCH 0/10] IB SRP initiator patches for kernel 3.12 David Dillow

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:675e203 dfblob:04a97b6 dfblob:ee70016 dfblob:4ebf691 )
 OR (
bs:"[PATCH 06/10] scsi_transport_srp: Add periodic reconnect support" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=52569A75.30200@acm.org \
    --to=bvanassche-hinycgiudog@public.gmane.org \
    --cc=dillowda-1Heg1YXhbW8@public.gmane.org \
    --cc=jbottomley-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-scsi-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=roland-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org \
    --cc=sebastian.riemer-EIkl63zCoXaH+58JC4qpiA@public.gmane.org \
    --cc=vu-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.