From: Vu Pham <vuhuong-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
To: Linux RDMA list <linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>
Subject: [ofa-general][PATCH 3/4] SRP fail-over faster
Date: Mon, 12 Oct 2009 15:57:23 -0700 [thread overview]
Message-ID: <4AD3B453.3030109@mellanox.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 1 bytes --]
[-- Attachment #2: [ofa-general][PATCH 3/4] SRP fail-over faster,.eml --]
[-- Type: message/rfc822, Size: 6377 bytes --]
[-- Attachment #2.1.1: Type: text/plain, Size: 396 bytes --]
Introducing srp_dev_loss_tmo module parameter. Creating a timer to clean
up connection after srp_dev_loss_tmo expired. During srp_dev_loss_tmo,
the qp is in error state, srp will return DID_RESET for outstanding I/O
and return FAILED for abort_cmd, reset_lun, and return SUCCESS (without
trying reconnect) on reset_host.
Signed-off-by: Vu Pham <vu-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
[-- Attachment #2.1.2: srp_3_dev_loss_tmo.patch --]
[-- Type: text/plain, Size: 5040 bytes --]
Index: ofed_kernel/drivers/infiniband/ulp/srp/ib_srp.c
===================================================================
--- ofed_kernel.orig/drivers/infiniband/ulp/srp/ib_srp.c
+++ ofed_kernel/drivers/infiniband/ulp/srp/ib_srp.c
@@ -78,6 +77,13 @@
MODULE_PARM_DESC(mellanox_workarounds,
"Enable workarounds for Mellanox SRP target bugs if != 0");
+static int srp_dev_loss_tmo = 60;
+
+module_param(srp_dev_loss_tmo, int, 0444);
+MODULE_PARM_DESC(srp_dev_loss_tmo,
+ "Default number of seconds that srp transport should \
+ insulate the lost of a remote port (default is 60 secs");
+
static void srp_add_one(struct ib_device *device);
static void srp_remove_one(struct ib_device *device);
static void srp_completion(struct ib_cq *cq, void *target_ptr);
@@ -898,6 +926,48 @@
DMA_FROM_DEVICE);
}
+static void srp_reconnect_work(struct work_struct *work)
+{
+ struct srp_target_port *target =
+ container_of(work, struct srp_target_port, work);
+
+ srp_reconnect_target(target);
+ target->work_in_progress = 0;
+}
+
+static void srp_qp_in_err_timer(unsigned long data)
+{
+ struct srp_target_port *target = (struct srp_target_port *)data;
+ struct srp_request *req, *tmp;
+
+ if (target->state != SRP_TARGET_LIVE)
+ return;
+
+ spin_lock_irq(target->scsi_host->host_lock);
+ list_for_each_entry_safe(req, tmp, &target->req_queue, list)
+ srp_reset_req(target, req);
+ spin_unlock_irq(target->scsi_host->host_lock);
+
+ spin_lock_irq(target->scsi_host->host_lock);
+ if (!target->work_in_progress) {
+ target->work_in_progress = 1;
+ INIT_WORK(&target->work, srp_reconnect_work);
+ schedule_work(&target->work);
+ }
+ spin_unlock_irq(target->scsi_host->host_lock);
+}
+
+static void srp_qp_err_add_timer(struct srp_target_port *target, int time)
+{
+ if (!timer_pending(&target->qp_err_timer)) {
+ setup_timer(&target->qp_err_timer,
+ srp_qp_in_err_timer,
+ (unsigned long)target);
+ target->qp_err_timer.expires = time * HZ + jiffies;
+ add_timer(&target->qp_err_timer);
+ }
+}
+
static void srp_completion(struct ib_cq *cq, void *target_ptr)
{
struct srp_target_port *target = target_ptr;
@@ -960,11 +980,20 @@
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
while (ib_poll_cq(cq, 1, &wc) > 0) {
if (wc.status) {
+ unsigned long flags;
+
shost_printk(KERN_ERR, target->scsi_host,
PFX "failed %s status %d\n",
wc.wr_id & SRP_OP_RECV ? "receive" : "send",
wc.status);
- target->qp_in_error = 1;
+ spin_lock_irqsave(target->scsi_host->host_lock, flags);
+ if (!target->qp_in_error &&
+ target->state == SRP_TARGET_LIVE) {
+ target->qp_in_error = 1;
+ srp_qp_err_add_timer(target,
+ srp_dev_loss_tmo - 55);
+ }
+ spin_unlock_irqrestore(target->scsi_host->host_lock, flags);
break;
}
@@ -1274,5 +1299,6 @@
int attr_mask = 0;
int comp = 0;
int opcode = 0;
+ unsigned long flags;
switch (event->event) {
@@ -1301,6 +1381,14 @@
shost_printk(KERN_ERR, target->scsi_host,
PFX "connection closed\n");
+ spin_lock_irqsave(target->scsi_host->host_lock, flags);
+ if (!target->qp_in_error &&
+ target->state == SRP_TARGET_LIVE) {
+ target->qp_in_error = 1;
+ srp_qp_err_add_timer(target,
+ srp_dev_loss_tmo - 55);
+ }
+ spin_unlock_irqrestore(target->scsi_host->host_lock, flags);
target->status = 0;
break;
@@ -1443,9 +1529,22 @@
static int srp_reset_host(struct scsi_cmnd *scmnd)
{
struct srp_target_port *target = host_to_target(scmnd->device->host);
+ struct srp_request *req, *tmp;
int ret = FAILED;
- shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
+ shost_printk(KERN_ERR, target->scsi_host,
+ PFX "SRP reset_host called state %d qp_err %d\n",
+ target->state, target->qp_in_error);
+
+ spin_lock_irq(target->scsi_host->host_lock);
+ if (timer_pending(&target->qp_err_timer) || target->qp_in_error ||
+ target->state != SRP_TARGET_LIVE) {
+ list_for_each_entry_safe(req, tmp, &target->req_queue, list)
+ srp_reset_req(target, req);
+ spin_unlock_irq(target->scsi_host->host_lock);
+ return SUCCESS;
+ }
+ spin_unlock_irq(target->scsi_host->host_lock);
if (!srp_reconnect_target(target))
ret = SUCCESS;
@@ -2150,6 +2342,9 @@
sizeof (struct srp_indirect_buf) +
srp_sg_tablesize * 16);
+ if (srp_dev_loss_tmo < 60)
+ srp_dev_loss_tmo = 60;
+
ret = class_register(&srp_class);
if (ret) {
printk(KERN_ERR PFX "couldn't register class infiniband_srp\n");
Index: ofed_kernel/drivers/infiniband/ulp/srp/ib_srp.h
===================================================================
--- ofed_kernel.orig/drivers/infiniband/ulp/srp/ib_srp.h
+++ ofed_kernel/drivers/infiniband/ulp/srp/ib_srp.h
@@ -153,12 +159,14 @@
struct srp_request req_ring[SRP_SQ_SIZE];
struct work_struct work;
+ int work_in_progress;
struct list_head list;
struct completion done;
int status;
enum srp_target_state state;
int qp_in_error;
+ struct timer_list qp_err_timer;
};
struct srp_iu {
next reply other threads:[~2009-10-12 22:57 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-10-12 22:57 Vu Pham [this message]
[not found] ` <4AD3B453.3030109-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2009-10-13 11:09 ` [ofa-general][PATCH 3/4] SRP fail-over faster Bart Van Assche
2009-10-14 18:12 ` Roland Dreier
[not found] ` <ada1vl5alqh.fsf-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
2009-10-14 20:37 ` Vu Pham
[not found] ` <4AD63681.6080901-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2009-10-14 20:52 ` Roland Dreier
[not found] ` <adaljjd8zrj.fsf-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
2009-10-14 21:08 ` Vu Pham
[not found] ` <4AD63DB1.3060906-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2009-10-14 22:47 ` Roland Dreier
[not found] ` <adahbu18uf5.fsf-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
2009-10-14 23:59 ` Vu Pham
2009-10-15 1:39 ` David Dillow
[not found] ` <1255570760.13845.4.camel-1q1vX8mYZiGLUyTwlgNVppKKF0rrzTr+@public.gmane.org>
2009-10-15 16:23 ` Vu Pham
[not found] ` <4AD74C88.8030604-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2009-10-15 19:25 ` David Dillow
[not found] ` <1255634715.29829.9.camel-FqX9LgGZnHWDB2HL1qBt2PIbXMQ5te18@public.gmane.org>
2009-10-15 21:35 ` Jason Gunthorpe
[not found] ` <20091015213512.GW5191-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2009-10-22 23:13 ` Vu Pham
[not found] ` <4AE0E71E.20309-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2009-10-22 23:33 ` David Dillow
[not found] ` <1256254394.1579.86.camel-FqX9LgGZnHWDB2HL1qBt2PIbXMQ5te18@public.gmane.org>
2009-10-22 23:34 ` David Dillow
[not found] ` <1256254459.1579.87.camel-FqX9LgGZnHWDB2HL1qBt2PIbXMQ5te18@public.gmane.org>
2009-10-22 23:38 ` David Dillow
[not found] ` <1256254692.1579.89.camel-FqX9LgGZnHWDB2HL1qBt2PIbXMQ5te18@public.gmane.org>
2009-10-23 0:04 ` Vu Pham
[not found] ` <4AE0F309.5040201-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2009-10-23 0:16 ` David Dillow
[not found] ` <1256256984.1579.105.camel-FqX9LgGZnHWDB2HL1qBt2PIbXMQ5te18@public.gmane.org>
2009-10-23 0:24 ` Vu Pham
[not found] ` <4AE0F7DA.20100-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2009-10-23 0:34 ` David Dillow
[not found] ` <1256258049.1598.8.camel-FqX9LgGZnHWDB2HL1qBt2PIbXMQ5te18@public.gmane.org>
2009-10-23 16:50 ` Vu Pham
[not found] ` <4AE1DEEF.5070205-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2009-10-23 22:08 ` David Dillow
[not found] ` <1256335698.10273.62.camel-FqX9LgGZnHWDB2HL1qBt2PIbXMQ5te18@public.gmane.org>
2009-10-24 7:35 ` Vu Pham
[not found] ` <4AE2AE54.5020004-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2009-10-28 15:09 ` David Dillow
2009-10-29 18:42 ` Vladislav Bolkhovitin
2009-10-23 6:13 ` Bart Van Assche
[not found] ` <e2e108260910222313o27c8b97dh483d846b6c98e480-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2009-10-23 16:52 ` Vu Pham
2009-10-28 18:00 ` Roland Dreier
[not found] ` <adavdhzs8iv.fsf-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
2009-10-29 16:37 ` Vu Pham
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4AD3B453.3030109@mellanox.com \
--to=vuhuong-vpraknaxozvwk0htik3j/w@public.gmane.org \
--cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.