[PATCH] IB/srp: Fix initiator lockup

public inbox for linux-rdma@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH] IB/srp: Fix initiator lockup
@ 2010-01-02 12:19 Bart Van Assche
       [not found] ` <e2e108261001020419l36319156hb9d625edc2e15d06-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 35+ messages in thread
From: Bart Van Assche @ 2010-01-02 12:19 UTC (permalink / raw)
  To: OFED mailing list; +Cc: Roland Dreier, Chris Worley

When the SRP initiator is communicating with an SRP target under load it can
happen that the SRP initiator locks up. The communication pattern that causes
the lockup is as follows:
* SRP initiator sends (req_lim - 2) SRP_CMD requests to the target.
* The REQUEST LIMIT DELTA field of each SRP_RSP response is zero.
* The target sends an SRP_CRED_REQ information unit with non-zero REQUEST
  LIMIT DELTA.

The above communication pattern brings the initiator in the following state:
* srp_queuecommand() always returns SCSI_MLQUEUE_HOST_BUSY.
* The per-session variable zero_req_lim keeps increasing.
The initiator never leaves this state because it ignores SRP_CRED_REQ
information units.

This patch fixes this issue by adding support for SRP_CRED_REQ information
units in the SRP initiator. Additionally, this patch makes the per-session
variable req_lim visible through sysfs, which makes observing the initiator
state easier.

See also http://bugzilla.kernel.org/show_bug.cgi?id=14235

Signed-off-by: Bart Van Assche <bart.vanassche-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Reported-by: Chris Worley <worleys-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Cc: Roland Dreier <rolandd-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>

diff --git a/drivers/infiniband/ulp/srp/ib_srp.c
b/drivers/infiniband/ulp/srp/ib_srp.c
index 54c8fe2..2006a0a 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2009 Bart Van Assche <bart.vanassche-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -53,6 +54,8 @@
 #define PFX		DRV_NAME ": "
 #define DRV_VERSION	"0.2"
 #define DRV_RELDATE	"November 1, 2005"
+/* Similar to is_power_of_2(), but can be evaluated at compile time. */
+#define IS_POWER_OF_2(n) ((n) != 0 && (((n) & ((n) - 1)) == 0))

 MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator "
@@ -82,6 +85,11 @@ static void srp_add_one(struct ib_device *device);
 static void srp_remove_one(struct ib_device *device);
 static void srp_completion(struct ib_cq *cq, void *target_ptr);
 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
+static void srp_process_cred_req(struct srp_target_port *target,
+				 struct srp_cred_req *req);
+static void srp_process_aer_req(struct srp_target_port *target,
+				struct srp_cred_req *req);
+static int srp_post_recv(struct srp_target_port *target);

 static struct scsi_transport_template *ib_srp_transport_template;

@@ -237,7 +245,7 @@ static int srp_create_target_ib(struct
srp_target_port *target)
 	ib_req_notify_cq(target->cq, IB_CQ_NEXT_COMP);

 	init_attr->event_handler       = srp_qp_event;
-	init_attr->cap.max_send_wr     = SRP_SQ_SIZE;
+	init_attr->cap.max_send_wr     = SRP_SQ_SIZE + SRP_TXP_SIZE;
 	init_attr->cap.max_recv_wr     = SRP_RQ_SIZE;
 	init_attr->cap.max_recv_sge    = 1;
 	init_attr->cap.max_send_sge    = 1;
@@ -272,10 +280,12 @@ static void srp_free_target_ib(struct
srp_target_port *target)
 	ib_destroy_qp(target->qp);
 	ib_destroy_cq(target->cq);

-	for (i = 0; i < SRP_RQ_SIZE; ++i)
+	for (i = 0; i < ARRAY_SIZE(target->rx_ring); ++i)
 		srp_free_iu(target->srp_host, target->rx_ring[i]);
-	for (i = 0; i < SRP_SQ_SIZE + 1; ++i)
+	for (i = 0; i < ARRAY_SIZE(target->tx_ring); ++i)
 		srp_free_iu(target->srp_host, target->tx_ring[i]);
+	for (i = 0; i < ARRAY_SIZE(target->txp_ring); ++i)
+		srp_free_iu(target->srp_host, target->txp_ring[i]);
 }

 static void srp_path_rec_completion(int status,
@@ -888,6 +898,14 @@ static void srp_handle_recv(struct
srp_target_port *target, struct ib_wc *wc)
 			     PFX "Got target logout request\n");
 		break;

+	case SRP_CRED_REQ:
+		srp_process_cred_req(target, iu->buf);
+		break;
+
+	case SRP_AER_REQ:
+		srp_process_aer_req(target, iu->buf);
+		break;
+
 	default:
 		shost_printk(KERN_WARNING, target->scsi_host,
 			     PFX "Unhandled SRP opcode 0x%02x\n", opcode);
@@ -908,7 +926,11 @@ static void srp_completion(struct ib_cq *cq, void
*target_ptr)
 		if (wc.status) {
 			shost_printk(KERN_ERR, target->scsi_host,
 				     PFX "failed %s status %d\n",
-				     wc.wr_id & SRP_OP_RECV ? "receive" : "send",
+				     wc.wr_id & SRP_OP_RECV
+				     ? "receiving"
+				     : wc.wr_id & SRP_OP_TXP
+				     ? "sending response"
+				     : "sending request",
 				     wc.status);
 			target->qp_in_error = 1;
 			break;
@@ -916,6 +938,8 @@ static void srp_completion(struct ib_cq *cq, void
*target_ptr)

 		if (wc.wr_id & SRP_OP_RECV)
 			srp_handle_recv(target, &wc);
+		else if (wc.wr_id & SRP_OP_TXP)
+			++target->txp_tail;
 		else
 			++target->tx_tail;
 	}
@@ -961,15 +985,19 @@ static int srp_post_recv(struct srp_target_port *target)
 }

 /*
+ * Obtain an information unit for sending a request to the target.
+ *
  * Must be called with target->scsi_host->host_lock held to protect
  * req_lim and tx_head.  Lock cannot be dropped between call here and
- * call to __srp_post_send().
+ * call to __srp_post_send_req().
  */
 static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target,
 					enum srp_request_type req_type)
 {
 	s32 min = (req_type == SRP_REQ_TASK_MGMT) ? 1 : 2;

+	BUILD_BUG_ON(!IS_POWER_OF_2(ARRAY_SIZE(target->tx_ring)));
+
 	if (target->tx_head - target->tx_tail >= SRP_SQ_SIZE)
 		return NULL;

@@ -978,26 +1006,31 @@ static struct srp_iu *__srp_get_tx_iu(struct
srp_target_port *target,
 		return NULL;
 	}

-	return target->tx_ring[target->tx_head & SRP_SQ_SIZE];
+	return target->tx_ring[target->tx_head
+			       & (ARRAY_SIZE(target->tx_ring) - 1)];
 }

 /*
+ * Send a request to the target.
+ *
  * Must be called with target->scsi_host->host_lock held to protect
  * req_lim and tx_head.
  */
-static int __srp_post_send(struct srp_target_port *target,
-			   struct srp_iu *iu, int len)
+static int __srp_post_send_req(struct srp_target_port *target,
+			       struct srp_iu *iu, int len)
 {
 	struct ib_sge list;
 	struct ib_send_wr wr, *bad_wr;
 	int ret = 0;

+	BUILD_BUG_ON(!IS_POWER_OF_2(ARRAY_SIZE(target->tx_ring)));
+
 	list.addr   = iu->dma;
 	list.length = len;
 	list.lkey   = target->srp_host->srp_dev->mr->lkey;

 	wr.next       = NULL;
-	wr.wr_id      = target->tx_head & SRP_SQ_SIZE;
+	wr.wr_id      = target->tx_head & (ARRAY_SIZE(target->tx_ring) - 1);
 	wr.sg_list    = &list;
 	wr.num_sge    = 1;
 	wr.opcode     = IB_WR_SEND;
@@ -1013,6 +1046,117 @@ static int __srp_post_send(struct
srp_target_port *target,
 	return ret;
 }

+/*
+ * Obtain an information unit for sending a response to the target.
+ *
+ * Must be called with target->scsi_host->host_lock held to protect
+ * req_lim and tx_head.  Lock cannot be dropped between call here and
+ * call to __srp_post_send_rsp().
+ */
+static struct srp_iu *__srp_get_txp_iu(struct srp_target_port *target)
+{
+	BUILD_BUG_ON(!IS_POWER_OF_2(SRP_TXP_SIZE));
+
+	if (WARN_ON(target->txp_head - target->txp_tail >= SRP_TXP_SIZE))
+		return NULL;
+
+	return target->txp_ring[target->txp_head & (SRP_TXP_SIZE - 1)];
+}
+
+/*
+ * Send a response to a request received from the target.
+ *
+ * Must be called with target->scsi_host->host_lock held to protect txp_head.
+ */
+static int __srp_post_send_rsp(struct srp_target_port *target,
+			       struct srp_iu *iu, int len)
+{
+	struct ib_sge list;
+	struct ib_send_wr wr, *bad_wr;
+	int ret = 0;
+
+	BUILD_BUG_ON(!IS_POWER_OF_2(SRP_TXP_SIZE));
+
+	list.addr   = iu->dma;
+	list.length = len;
+	list.lkey   = target->srp_host->srp_dev->mr->lkey;
+
+	wr.next       = NULL;
+	wr.wr_id      = (target->txp_head & (SRP_TXP_SIZE - 1)) | SRP_OP_TXP;
+	wr.sg_list    = &list;
+	wr.num_sge    = 1;
+	wr.opcode     = IB_WR_SEND;
+	wr.send_flags = IB_SEND_SIGNALED;
+
+	ret = ib_post_send(target->qp, &wr, &bad_wr);
+
+	if (!ret)
+		++target->txp_head;
+
+	return ret;
+}
+
+static void srp_process_cred_req(struct srp_target_port *target,
+				 struct srp_cred_req *req)
+{
+	struct ib_device *dev;
+	struct srp_iu *iu;
+	struct srp_cred_rsp *rsp;
+	unsigned long flags;
+	int res;
+	s32 delta;
+	u64 tag;
+
+	dev = target->srp_host->srp_dev->dev;
+	delta = (s32) be32_to_cpu(req->req_lim_delta);
+	tag = req->tag;
+
+	spin_lock_irqsave(target->scsi_host->host_lock, flags);
+
+	target->req_lim += delta;
+
+	iu = __srp_get_txp_iu(target);
+	if (!iu)
+		goto out;
+
+	rsp = iu->buf;
+
+	ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof(*rsp),
+				   DMA_TO_DEVICE);
+
+	memset(rsp, 0, sizeof *rsp);
+	rsp->opcode = SRP_CRED_RSP;
+	rsp->tag    = tag;
+
+	ib_dma_sync_single_for_device(dev, iu->dma, srp_max_iu_len,
+				      DMA_TO_DEVICE);
+
+	if (__srp_post_send_rsp(target, iu, sizeof(*rsp)))
+		shost_printk(KERN_ERR, target->scsi_host,
+			     PFX "Sending response failed\n");
+
+	res = __srp_post_recv(target);
+	if (res)
+		shost_printk(KERN_ERR, target->scsi_host,
+			     PFX "ib_post_recv() failed -- res = %d\n", res);
+
+out:
+	spin_unlock_irqrestore(target->scsi_host->host_lock, flags);
+}
+
+static void srp_process_aer_req(struct srp_target_port *target,
+				 struct srp_cred_req *req)
+{
+	int res;
+
+	shost_printk(KERN_ERR, target->scsi_host,
+		     PFX "received and ignored SRP_AER_REQ\n");
+	res = srp_post_recv(target);
+	if (res)
+		shost_printk(KERN_ERR, target->scsi_host,
+			     PFX "ib_post_recv() failed -- res = %d\n", res);
+}
+
 static int srp_queuecommand(struct scsi_cmnd *scmnd,
 			    void (*done)(struct scsi_cmnd *))
 {
@@ -1075,8 +1219,9 @@ static int srp_queuecommand(struct scsi_cmnd *scmnd,
 	ib_dma_sync_single_for_device(dev, iu->dma, srp_max_iu_len,
 				      DMA_TO_DEVICE);

-	if (__srp_post_send(target, iu, len)) {
-		shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
+	if (__srp_post_send_req(target, iu, len)) {
+		shost_printk(KERN_ERR, target->scsi_host,
+			     PFX "Sending request failed\n");
 		goto err_unmap;
 	}

@@ -1095,7 +1240,7 @@ static int srp_alloc_iu_bufs(struct
srp_target_port *target)
 {
 	int i;

-	for (i = 0; i < SRP_RQ_SIZE; ++i) {
+	for (i = 0; i < ARRAY_SIZE(target->rx_ring); ++i) {
 		target->rx_ring[i] = srp_alloc_iu(target->srp_host,
 						  target->max_ti_iu_len,
 						  GFP_KERNEL, DMA_FROM_DEVICE);
@@ -1103,7 +1248,7 @@ static int srp_alloc_iu_bufs(struct
srp_target_port *target)
 			goto err;
 	}

-	for (i = 0; i < SRP_SQ_SIZE + 1; ++i) {
+	for (i = 0; i < ARRAY_SIZE(target->tx_ring); ++i) {
 		target->tx_ring[i] = srp_alloc_iu(target->srp_host,
 						  srp_max_iu_len,
 						  GFP_KERNEL, DMA_TO_DEVICE);
@@ -1111,19 +1256,32 @@ static int srp_alloc_iu_bufs(struct
srp_target_port *target)
 			goto err;
 	}

+	for (i = 0; i < ARRAY_SIZE(target->txp_ring); ++i) {
+		target->txp_ring[i] = srp_alloc_iu(target->srp_host,
+						  srp_max_iu_len,
+						  GFP_KERNEL, DMA_TO_DEVICE);
+		if (!target->txp_ring[i])
+			goto err;
+	}
+
 	return 0;

 err:
-	for (i = 0; i < SRP_RQ_SIZE; ++i) {
+	for (i = 0; i < ARRAY_SIZE(target->rx_ring); ++i) {
 		srp_free_iu(target->srp_host, target->rx_ring[i]);
 		target->rx_ring[i] = NULL;
 	}

-	for (i = 0; i < SRP_SQ_SIZE + 1; ++i) {
+	for (i = 0; i < ARRAY_SIZE(target->tx_ring); ++i) {
 		srp_free_iu(target->srp_host, target->tx_ring[i]);
 		target->tx_ring[i] = NULL;
 	}

+	for (i = 0; i < ARRAY_SIZE(target->txp_ring); ++i) {
+		srp_free_iu(target->srp_host, target->txp_ring[i]);
+		target->tx_ring[i] = NULL;
+	}
+
 	return -ENOMEM;
 }

@@ -1211,6 +1369,7 @@ static int srp_cm_handler(struct ib_cm_id
*cm_id, struct ib_cm_event *event)
 {
 	struct srp_target_port *target = cm_id->context;
 	struct ib_qp_attr *qp_attr = NULL;
+	int i;
 	int attr_mask = 0;
 	int comp = 0;
 	int opcode = 0;
@@ -1263,7 +1422,11 @@ static int srp_cm_handler(struct ib_cm_id
*cm_id, struct ib_cm_event *event)
 		if (target->status)
 			break;

-		target->status = srp_post_recv(target);
+		for (i = 0; i < SRP_RXR_SIZE; ++i) {
+			target->status = srp_post_recv(target);
+			if (target->status)
+				break;
+		}
 		if (target->status)
 			break;

@@ -1353,7 +1516,7 @@ static int srp_send_tsk_mgmt(struct
srp_target_port *target,
 	tsk_mgmt->tsk_mgmt_func = func;
 	tsk_mgmt->task_tag 	= req->index;

-	if (__srp_post_send(target, iu, sizeof *tsk_mgmt))
+	if (__srp_post_send_req(target, iu, sizeof *tsk_mgmt))
 		goto out;

 	req->tsk_mgmt = iu;
@@ -1529,6 +1692,18 @@ static ssize_t show_orig_dgid(struct device *dev,
 	return sprintf(buf, "%pI6\n", target->orig_dgid);
 }

+static ssize_t show_req_lim(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct srp_target_port *target = host_to_target(class_to_shost(dev));
+
+	if (target->state == SRP_TARGET_DEAD ||
+	    target->state == SRP_TARGET_REMOVED)
+		return -ENODEV;
+
+	return sprintf(buf, "%d\n", target->req_lim);
+}
+
 static ssize_t show_zero_req_lim(struct device *dev,
 				 struct device_attribute *attr, char *buf)
 {
@@ -1563,6 +1738,7 @@ static DEVICE_ATTR(service_id,	    S_IRUGO,
show_service_id,	   NULL);
 static DEVICE_ATTR(pkey,	    S_IRUGO, show_pkey,		   NULL);
 static DEVICE_ATTR(dgid,	    S_IRUGO, show_dgid,		   NULL);
 static DEVICE_ATTR(orig_dgid,	    S_IRUGO, show_orig_dgid,	   NULL);
+static DEVICE_ATTR(req_lim,	    S_IRUGO, show_req_lim,	   NULL);
 static DEVICE_ATTR(zero_req_lim,    S_IRUGO, show_zero_req_lim,	   NULL);
 static DEVICE_ATTR(local_ib_port,   S_IRUGO, show_local_ib_port,   NULL);
 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
@@ -1574,6 +1750,7 @@ static struct device_attribute *srp_host_attrs[] = {
 	&dev_attr_pkey,
 	&dev_attr_dgid,
 	&dev_attr_orig_dgid,
+	&dev_attr_req_lim,
 	&dev_attr_zero_req_lim,
 	&dev_attr_local_ib_port,
 	&dev_attr_local_ib_device,
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h
b/drivers/infiniband/ulp/srp/ib_srp.h
index e185b90..c553a03 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -57,10 +57,20 @@ enum {
 	SRP_MAX_LUN		= 512,
 	SRP_DEF_SG_TABLESIZE	= 12,

+	/*
+	 * Receive queue: queue for receiving responses and requests from the
+	 * target.
+	 */
 	SRP_RQ_SHIFT    	= 6,
 	SRP_RQ_SIZE		= 1 << SRP_RQ_SHIFT,
-	SRP_SQ_SIZE		= SRP_RQ_SIZE - 1,
-	SRP_CQ_SIZE		= SRP_SQ_SIZE + SRP_RQ_SIZE,
+	/* Number of receive slots reserved for receiving requests. */
+	SRP_RXR_SIZE		= 2,
+	/* Send queue: queue for sending requests to the target. */
+	SRP_SQ_SIZE		= SRP_RQ_SIZE - SRP_RXR_SIZE - 1,
+	/* Size of the queue for sending responses to the target. */
+	SRP_TXP_SIZE            = 2,
+	/* Completion queue. */
+	SRP_CQ_SIZE		= SRP_SQ_SIZE + SRP_TXP_SIZE + SRP_RQ_SIZE,

 	SRP_TAG_TSK_MGMT	= 1 << (SRP_RQ_SHIFT + 1),

@@ -69,6 +79,9 @@ enum {
 	SRP_FMR_DIRTY_SIZE	= SRP_FMR_POOL_SIZE / 4
 };

+/* wr_id / wc_id flag for marking responses sent to the target. */
+#define SRP_OP_TXP		(1 << 30)
+/* wr_id / wc_id flag for marking receive operations. */
 #define SRP_OP_RECV		(1 << 31)

 enum srp_target_state {
@@ -141,12 +154,20 @@ struct srp_target_port {

 	int			zero_req_lim;

+
+	/* Receive ring for responses and requests received by the initiator. */
 	unsigned		rx_head;
 	struct srp_iu	       *rx_ring[SRP_RQ_SIZE];

+	/* Transmit ring for requests sent to the target. */
 	unsigned		tx_head;
 	unsigned		tx_tail;
-	struct srp_iu	       *tx_ring[SRP_SQ_SIZE + 1];
+	struct srp_iu	       *tx_ring[SRP_SQ_SIZE + SRP_RXR_SIZE + 1];
+
+	/* Transmit ring for responses sent to the target. */
+	unsigned		txp_head;
+	unsigned		txp_tail;
+	struct srp_iu	       *txp_ring[SRP_TXP_SIZE];

 	struct list_head	free_reqs;
 	struct list_head	req_queue;
diff --git a/include/scsi/srp.h b/include/scsi/srp.h
index ad178fa..55c16c7 100644
--- a/include/scsi/srp.h
+++ b/include/scsi/srp.h
@@ -239,4 +239,26 @@ struct srp_rsp {
 	u8	data[0];
 } __attribute__((packed));

+/*
+ * SRP_CRED_REQ information unit, as defined in section 6.10 of the
+ * T10 SRP r16a document.
+ */
+struct srp_cred_req {
+	u8	opcode;
+	u8	sol_not;
+	u8	reserved[2];
+	__be32	req_lim_delta;
+	u64	tag;
+} __attribute__((packed));
+
+/*
+ * SRP_CRED_RSP information unit, as defined in section 6.11 of the
+ * T10 SRP r16a document.
+ */
+struct srp_cred_rsp {
+	u8	opcode;
+	u8	reserved[7];
+	u64	tag;
+} __attribute__((packed));
+
 #endif /* SCSI_SRP_H */
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Fwd: [PATCH] IB/srp: Fix initiator lockup
       [not found] ` <e2e108261001020419l36319156hb9d625edc2e15d06-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2010-01-02 16:05   ` Chris Worley
       [not found]     ` <f3177b9e1001020805k4dce1991u3733c5a7f6d46aaa-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2010-01-04  1:34   ` David Dillow
  2010-01-06 21:37   ` Roland Dreier
  2 siblings, 1 reply; 35+ messages in thread
From: Chris Worley @ 2010-01-02 16:05 UTC (permalink / raw)
  To: ofw-ZwoEplunGu1OwGhvXhtEPSCwEArCW2h5; +Cc: OFED mailing list

Would this fix effect the Window's SRP initiator too?

Thanks for this fix, Bart!

Chris
---------- Forwarded message ----------
From: Bart Van Assche <bart.vanassche-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Date: Sat, Jan 2, 2010 at 5:19 AM
Subject: [PATCH] IB/srp: Fix initiator lockup
To: OFED mailing list <linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>
Cc: Roland Dreier <rdreier-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>, Chris Worley <worleys-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>


When the SRP initiator is communicating with an SRP target under load it can
happen that the SRP initiator locks up. The communication pattern that causes
the lockup is as follows:
* SRP initiator sends (req_lim - 2) SRP_CMD requests to the target.
* The REQUEST LIMIT DELTA field of each SRP_RSP response is zero.
* The target sends an SRP_CRED_REQ information unit with non-zero REQUEST
 LIMIT DELTA.

The above communication pattern brings the initiator in the following state:
* srp_queuecommand() always returns SCSI_MLQUEUE_HOST_BUSY.
* The per-session variable zero_req_lim keeps increasing.
The initiator never leaves this state because it ignores SRP_CRED_REQ
information units.

This patch fixes this issue by adding support for SRP_CRED_REQ information
units in the SRP initiator. Additionally, this patch makes the per-session
variable req_lim visible through sysfs, which makes observing the initiator
state easier.

See also http://bugzilla.kernel.org/show_bug.cgi?id=14235

Signed-off-by: Bart Van Assche <bart.vanassche-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Reported-by: Chris Worley <worleys-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Cc: Roland Dreier <rolandd-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>

diff --git a/drivers/infiniband/ulp/srp/ib_srp.c
b/drivers/infiniband/ulp/srp/ib_srp.c
index 54c8fe2..2006a0a 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -1,5 +1,6 @@
 /*
 * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2009 Bart Van Assche <bart.vanassche-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
@@ -53,6 +54,8 @@
 #define PFX            DRV_NAME ": "
 #define DRV_VERSION    "0.2"
 #define DRV_RELDATE    "November 1, 2005"
+/* Similar to is_power_of_2(), but can be evaluated at compile time. */
+#define IS_POWER_OF_2(n) ((n) != 0 && (((n) & ((n) - 1)) == 0))

 MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator "
@@ -82,6 +85,11 @@ static void srp_add_one(struct ib_device *device);
 static void srp_remove_one(struct ib_device *device);
 static void srp_completion(struct ib_cq *cq, void *target_ptr);
 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
+static void srp_process_cred_req(struct srp_target_port *target,
+                                struct srp_cred_req *req);
+static void srp_process_aer_req(struct srp_target_port *target,
+                               struct srp_cred_req *req);
+static int srp_post_recv(struct srp_target_port *target);

 static struct scsi_transport_template *ib_srp_transport_template;

@@ -237,7 +245,7 @@ static int srp_create_target_ib(struct
srp_target_port *target)
       ib_req_notify_cq(target->cq, IB_CQ_NEXT_COMP);

       init_attr->event_handler       = srp_qp_event;
-       init_attr->cap.max_send_wr     = SRP_SQ_SIZE;
+       init_attr->cap.max_send_wr     = SRP_SQ_SIZE + SRP_TXP_SIZE;
       init_attr->cap.max_recv_wr     = SRP_RQ_SIZE;
       init_attr->cap.max_recv_sge    = 1;
       init_attr->cap.max_send_sge    = 1;
@@ -272,10 +280,12 @@ static void srp_free_target_ib(struct
srp_target_port *target)
       ib_destroy_qp(target->qp);
       ib_destroy_cq(target->cq);

-       for (i = 0; i < SRP_RQ_SIZE; ++i)
+       for (i = 0; i < ARRAY_SIZE(target->rx_ring); ++i)
               srp_free_iu(target->srp_host, target->rx_ring[i]);
-       for (i = 0; i < SRP_SQ_SIZE + 1; ++i)
+       for (i = 0; i < ARRAY_SIZE(target->tx_ring); ++i)
               srp_free_iu(target->srp_host, target->tx_ring[i]);
+       for (i = 0; i < ARRAY_SIZE(target->txp_ring); ++i)
+               srp_free_iu(target->srp_host, target->txp_ring[i]);
 }

 static void srp_path_rec_completion(int status,
@@ -888,6 +898,14 @@ static void srp_handle_recv(struct
srp_target_port *target, struct ib_wc *wc)
                            PFX "Got target logout request\n");
               break;

+       case SRP_CRED_REQ:
+               srp_process_cred_req(target, iu->buf);
+               break;
+
+       case SRP_AER_REQ:
+               srp_process_aer_req(target, iu->buf);
+               break;
+
       default:
               shost_printk(KERN_WARNING, target->scsi_host,
                            PFX "Unhandled SRP opcode 0x%02x\n", opcode);
@@ -908,7 +926,11 @@ static void srp_completion(struct ib_cq *cq, void
*target_ptr)
               if (wc.status) {
                       shost_printk(KERN_ERR, target->scsi_host,
                                    PFX "failed %s status %d\n",
-                                    wc.wr_id & SRP_OP_RECV ?
"receive" : "send",
+                                    wc.wr_id & SRP_OP_RECV
+                                    ? "receiving"
+                                    : wc.wr_id & SRP_OP_TXP
+                                    ? "sending response"
+                                    : "sending request",
                                    wc.status);
                       target->qp_in_error = 1;
                       break;
@@ -916,6 +938,8 @@ static void srp_completion(struct ib_cq *cq, void
*target_ptr)

               if (wc.wr_id & SRP_OP_RECV)
                       srp_handle_recv(target, &wc);
+               else if (wc.wr_id & SRP_OP_TXP)
+                       ++target->txp_tail;
               else
                       ++target->tx_tail;
       }
@@ -961,15 +985,19 @@ static int srp_post_recv(struct srp_target_port *target)
 }

 /*
+ * Obtain an information unit for sending a request to the target.
+ *
 * Must be called with target->scsi_host->host_lock held to protect
 * req_lim and tx_head.  Lock cannot be dropped between call here and
- * call to __srp_post_send().
+ * call to __srp_post_send_req().
 */
 static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target,
                                       enum srp_request_type req_type)
 {
       s32 min = (req_type == SRP_REQ_TASK_MGMT) ? 1 : 2;

+       BUILD_BUG_ON(!IS_POWER_OF_2(ARRAY_SIZE(target->tx_ring)));
+
       if (target->tx_head - target->tx_tail >= SRP_SQ_SIZE)
               return NULL;

@@ -978,26 +1006,31 @@ static struct srp_iu *__srp_get_tx_iu(struct
srp_target_port *target,
               return NULL;
       }

-       return target->tx_ring[target->tx_head & SRP_SQ_SIZE];
+       return target->tx_ring[target->tx_head
+                              & (ARRAY_SIZE(target->tx_ring) - 1)];
 }

 /*
+ * Send a request to the target.
+ *
 * Must be called with target->scsi_host->host_lock held to protect
 * req_lim and tx_head.
 */
-static int __srp_post_send(struct srp_target_port *target,
-                          struct srp_iu *iu, int len)
+static int __srp_post_send_req(struct srp_target_port *target,
+                              struct srp_iu *iu, int len)
 {
       struct ib_sge list;
       struct ib_send_wr wr, *bad_wr;
       int ret = 0;

+       BUILD_BUG_ON(!IS_POWER_OF_2(ARRAY_SIZE(target->tx_ring)));
+
       list.addr   = iu->dma;
       list.length = len;
       list.lkey   = target->srp_host->srp_dev->mr->lkey;

       wr.next       = NULL;
-       wr.wr_id      = target->tx_head & SRP_SQ_SIZE;
+       wr.wr_id      = target->tx_head & (ARRAY_SIZE(target->tx_ring) - 1);
       wr.sg_list    = &list;
       wr.num_sge    = 1;
       wr.opcode     = IB_WR_SEND;
@@ -1013,6 +1046,117 @@ static int __srp_post_send(struct
srp_target_port *target,
       return ret;
 }

+/*
+ * Obtain an information unit for sending a response to the target.
+ *
+ * Must be called with target->scsi_host->host_lock held to protect
+ * req_lim and tx_head.  Lock cannot be dropped between call here and
+ * call to __srp_post_send_rsp().
+ */
+static struct srp_iu *__srp_get_txp_iu(struct srp_target_port *target)
+{
+       BUILD_BUG_ON(!IS_POWER_OF_2(SRP_TXP_SIZE));
+
+       if (WARN_ON(target->txp_head - target->txp_tail >= SRP_TXP_SIZE))
+               return NULL;
+
+       return target->txp_ring[target->txp_head & (SRP_TXP_SIZE - 1)];
+}
+
+/*
+ * Send a response to a request received from the target.
+ *
+ * Must be called with target->scsi_host->host_lock held to protect txp_head.
+ */
+static int __srp_post_send_rsp(struct srp_target_port *target,
+                              struct srp_iu *iu, int len)
+{
+       struct ib_sge list;
+       struct ib_send_wr wr, *bad_wr;
+       int ret = 0;
+
+       BUILD_BUG_ON(!IS_POWER_OF_2(SRP_TXP_SIZE));
+
+       list.addr   = iu->dma;
+       list.length = len;
+       list.lkey   = target->srp_host->srp_dev->mr->lkey;
+
+       wr.next       = NULL;
+       wr.wr_id      = (target->txp_head & (SRP_TXP_SIZE - 1)) | SRP_OP_TXP;
+       wr.sg_list    = &list;
+       wr.num_sge    = 1;
+       wr.opcode     = IB_WR_SEND;
+       wr.send_flags = IB_SEND_SIGNALED;
+
+       ret = ib_post_send(target->qp, &wr, &bad_wr);
+
+       if (!ret)
+               ++target->txp_head;
+
+       return ret;
+}
+
+static void srp_process_cred_req(struct srp_target_port *target,
+                                struct srp_cred_req *req)
+{
+       struct ib_device *dev;
+       struct srp_iu *iu;
+       struct srp_cred_rsp *rsp;
+       unsigned long flags;
+       int res;
+       s32 delta;
+       u64 tag;
+
+       dev = target->srp_host->srp_dev->dev;
+       delta = (s32) be32_to_cpu(req->req_lim_delta);
+       tag = req->tag;
+
+       spin_lock_irqsave(target->scsi_host->host_lock, flags);
+
+       target->req_lim += delta;
+
+       iu = __srp_get_txp_iu(target);
+       if (!iu)
+               goto out;
+
+       rsp = iu->buf;
+
+       ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof(*rsp),
+                                  DMA_TO_DEVICE);
+
+       memset(rsp, 0, sizeof *rsp);
+       rsp->opcode = SRP_CRED_RSP;
+       rsp->tag    = tag;
+
+       ib_dma_sync_single_for_device(dev, iu->dma, srp_max_iu_len,
+                                     DMA_TO_DEVICE);
+
+       if (__srp_post_send_rsp(target, iu, sizeof(*rsp)))
+               shost_printk(KERN_ERR, target->scsi_host,
+                            PFX "Sending response failed\n");
+
+       res = __srp_post_recv(target);
+       if (res)
+               shost_printk(KERN_ERR, target->scsi_host,
+                            PFX "ib_post_recv() failed -- res = %d\n", res);
+
+out:
+       spin_unlock_irqrestore(target->scsi_host->host_lock, flags);
+}
+
+static void srp_process_aer_req(struct srp_target_port *target,
+                                struct srp_cred_req *req)
+{
+       int res;
+
+       shost_printk(KERN_ERR, target->scsi_host,
+                    PFX "received and ignored SRP_AER_REQ\n");
+       res = srp_post_recv(target);
+       if (res)
+               shost_printk(KERN_ERR, target->scsi_host,
+                            PFX "ib_post_recv() failed -- res = %d\n", res);
+}
+
 static int srp_queuecommand(struct scsi_cmnd *scmnd,
                           void (*done)(struct scsi_cmnd *))
 {
@@ -1075,8 +1219,9 @@ static int srp_queuecommand(struct scsi_cmnd *scmnd,
       ib_dma_sync_single_for_device(dev, iu->dma, srp_max_iu_len,
                                     DMA_TO_DEVICE);

-       if (__srp_post_send(target, iu, len)) {
-               shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
+       if (__srp_post_send_req(target, iu, len)) {
+               shost_printk(KERN_ERR, target->scsi_host,
+                            PFX "Sending request failed\n");
               goto err_unmap;
       }

@@ -1095,7 +1240,7 @@ static int srp_alloc_iu_bufs(struct
srp_target_port *target)
 {
       int i;

-       for (i = 0; i < SRP_RQ_SIZE; ++i) {
+       for (i = 0; i < ARRAY_SIZE(target->rx_ring); ++i) {
               target->rx_ring[i] = srp_alloc_iu(target->srp_host,
                                                 target->max_ti_iu_len,
                                                 GFP_KERNEL, DMA_FROM_DEVICE);
@@ -1103,7 +1248,7 @@ static int srp_alloc_iu_bufs(struct
srp_target_port *target)
                       goto err;
       }

-       for (i = 0; i < SRP_SQ_SIZE + 1; ++i) {
+       for (i = 0; i < ARRAY_SIZE(target->tx_ring); ++i) {
               target->tx_ring[i] = srp_alloc_iu(target->srp_host,
                                                 srp_max_iu_len,
                                                 GFP_KERNEL, DMA_TO_DEVICE);
@@ -1111,19 +1256,32 @@ static int srp_alloc_iu_bufs(struct
srp_target_port *target)
                       goto err;
       }

+       for (i = 0; i < ARRAY_SIZE(target->txp_ring); ++i) {
+               target->txp_ring[i] = srp_alloc_iu(target->srp_host,
+                                                 srp_max_iu_len,
+                                                 GFP_KERNEL, DMA_TO_DEVICE);
+               if (!target->txp_ring[i])
+                       goto err;
+       }
+
       return 0;

 err:
-       for (i = 0; i < SRP_RQ_SIZE; ++i) {
+       for (i = 0; i < ARRAY_SIZE(target->rx_ring); ++i) {
               srp_free_iu(target->srp_host, target->rx_ring[i]);
               target->rx_ring[i] = NULL;
       }

-       for (i = 0; i < SRP_SQ_SIZE + 1; ++i) {
+       for (i = 0; i < ARRAY_SIZE(target->tx_ring); ++i) {
               srp_free_iu(target->srp_host, target->tx_ring[i]);
               target->tx_ring[i] = NULL;
       }

+       for (i = 0; i < ARRAY_SIZE(target->txp_ring); ++i) {
+               srp_free_iu(target->srp_host, target->txp_ring[i]);
+               target->tx_ring[i] = NULL;
+       }
+
       return -ENOMEM;
 }

@@ -1211,6 +1369,7 @@ static int srp_cm_handler(struct ib_cm_id
*cm_id, struct ib_cm_event *event)
 {
       struct srp_target_port *target = cm_id->context;
       struct ib_qp_attr *qp_attr = NULL;
+       int i;
       int attr_mask = 0;
       int comp = 0;
       int opcode = 0;
@@ -1263,7 +1422,11 @@ static int srp_cm_handler(struct ib_cm_id
*cm_id, struct ib_cm_event *event)
               if (target->status)
                       break;

-               target->status = srp_post_recv(target);
+               for (i = 0; i < SRP_RXR_SIZE; ++i) {
+                       target->status = srp_post_recv(target);
+                       if (target->status)
+                               break;
+               }
               if (target->status)
                       break;

@@ -1353,7 +1516,7 @@ static int srp_send_tsk_mgmt(struct
srp_target_port *target,
       tsk_mgmt->tsk_mgmt_func = func;
       tsk_mgmt->task_tag      = req->index;

-       if (__srp_post_send(target, iu, sizeof *tsk_mgmt))
+       if (__srp_post_send_req(target, iu, sizeof *tsk_mgmt))
               goto out;

       req->tsk_mgmt = iu;
@@ -1529,6 +1692,18 @@ static ssize_t show_orig_dgid(struct device *dev,
       return sprintf(buf, "%pI6\n", target->orig_dgid);
 }

+static ssize_t show_req_lim(struct device *dev,
+                           struct device_attribute *attr, char *buf)
+{
+       struct srp_target_port *target = host_to_target(class_to_shost(dev));
+
+       if (target->state == SRP_TARGET_DEAD ||
+           target->state == SRP_TARGET_REMOVED)
+               return -ENODEV;
+
+       return sprintf(buf, "%d\n", target->req_lim);
+}
+
 static ssize_t show_zero_req_lim(struct device *dev,
                                struct device_attribute *attr, char *buf)
 {
@@ -1563,6 +1738,7 @@ static DEVICE_ATTR(service_id,        S_IRUGO,
show_service_id,           NULL);
 static DEVICE_ATTR(pkey,           S_IRUGO, show_pkey,            NULL);
 static DEVICE_ATTR(dgid,           S_IRUGO, show_dgid,            NULL);
 static DEVICE_ATTR(orig_dgid,      S_IRUGO, show_orig_dgid,       NULL);
+static DEVICE_ATTR(req_lim,        S_IRUGO, show_req_lim,         NULL);
 static DEVICE_ATTR(zero_req_lim,    S_IRUGO, show_zero_req_lim,
    NULL);
 static DEVICE_ATTR(local_ib_port,   S_IRUGO, show_local_ib_port,   NULL);
 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
@@ -1574,6 +1750,7 @@ static struct device_attribute *srp_host_attrs[] = {
       &dev_attr_pkey,
       &dev_attr_dgid,
       &dev_attr_orig_dgid,
+       &dev_attr_req_lim,
       &dev_attr_zero_req_lim,
       &dev_attr_local_ib_port,
       &dev_attr_local_ib_device,
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h
b/drivers/infiniband/ulp/srp/ib_srp.h
index e185b90..c553a03 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -57,10 +57,20 @@ enum {
       SRP_MAX_LUN             = 512,
       SRP_DEF_SG_TABLESIZE    = 12,

+       /*
+        * Receive queue: queue for receiving responses and requests from the
+        * target.
+        */
       SRP_RQ_SHIFT            = 6,
       SRP_RQ_SIZE             = 1 << SRP_RQ_SHIFT,
-       SRP_SQ_SIZE             = SRP_RQ_SIZE - 1,
-       SRP_CQ_SIZE             = SRP_SQ_SIZE + SRP_RQ_SIZE,
+       /* Number of receive slots reserved for receiving requests. */
+       SRP_RXR_SIZE            = 2,
+       /* Send queue: queue for sending requests to the target. */
+       SRP_SQ_SIZE             = SRP_RQ_SIZE - SRP_RXR_SIZE - 1,
+       /* Size of the queue for sending responses to the target. */
+       SRP_TXP_SIZE            = 2,
+       /* Completion queue. */
+       SRP_CQ_SIZE             = SRP_SQ_SIZE + SRP_TXP_SIZE + SRP_RQ_SIZE,

       SRP_TAG_TSK_MGMT        = 1 << (SRP_RQ_SHIFT + 1),

@@ -69,6 +79,9 @@ enum {
       SRP_FMR_DIRTY_SIZE      = SRP_FMR_POOL_SIZE / 4
 };

+/* wr_id / wc_id flag for marking responses sent to the target. */
+#define SRP_OP_TXP             (1 << 30)
+/* wr_id / wc_id flag for marking receive operations. */
 #define SRP_OP_RECV            (1 << 31)

 enum srp_target_state {
@@ -141,12 +154,20 @@ struct srp_target_port {

       int                     zero_req_lim;

+
+       /* Receive ring for responses and requests received by the initiator. */
       unsigned                rx_head;
       struct srp_iu          *rx_ring[SRP_RQ_SIZE];

+       /* Transmit ring for requests sent to the target. */
       unsigned                tx_head;
       unsigned                tx_tail;
-       struct srp_iu          *tx_ring[SRP_SQ_SIZE + 1];
+       struct srp_iu          *tx_ring[SRP_SQ_SIZE + SRP_RXR_SIZE + 1];
+
+       /* Transmit ring for responses sent to the target. */
+       unsigned                txp_head;
+       unsigned                txp_tail;
+       struct srp_iu          *txp_ring[SRP_TXP_SIZE];

       struct list_head        free_reqs;
       struct list_head        req_queue;
diff --git a/include/scsi/srp.h b/include/scsi/srp.h
index ad178fa..55c16c7 100644
--- a/include/scsi/srp.h
+++ b/include/scsi/srp.h
@@ -239,4 +239,26 @@ struct srp_rsp {
       u8      data[0];
 } __attribute__((packed));

+/*
+ * SRP_CRED_REQ information unit, as defined in section 6.10 of the
+ * T10 SRP r16a document.
+ */
+struct srp_cred_req {
+       u8      opcode;
+       u8      sol_not;
+       u8      reserved[2];
+       __be32  req_lim_delta;
+       u64     tag;
+} __attribute__((packed));
+
+/*
+ * SRP_CRED_RSP information unit, as defined in section 6.11 of the
+ * T10 SRP r16a document.
+ */
+struct srp_cred_rsp {
+       u8      opcode;
+       u8      reserved[7];
+       u64     tag;
+} __attribute__((packed));
+
 #endif /* SCSI_SRP_H */
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [PATCH] IB/srp: Fix initiator lockup
       [not found]     ` <f3177b9e1001020805k4dce1991u3733c5a7f6d46aaa-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2010-01-02 17:52       ` Bart Van Assche
  0 siblings, 0 replies; 35+ messages in thread
From: Bart Van Assche @ 2010-01-02 17:52 UTC (permalink / raw)
  To: Chris Worley; +Cc: ofw-ZwoEplunGu1OwGhvXhtEPSCwEArCW2h5, OFED mailing list

On Sat, Jan 2, 2010 at 5:05 PM, Chris Worley <worleys-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:
> Would this fix effect the Window's SRP initiator too?

I'm not familiar with the WinOF codebase. But at first sight it looks
like the WinOF SRP initiator doesn't support processing SRP_CRED_REQ
information units either, which means that the WinOF SRP initiator is
vulnerable to the same initiator lockup issue as the Linux SRP
initiator.

Bart.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] IB/srp: Fix initiator lockup
       [not found] ` <e2e108261001020419l36319156hb9d625edc2e15d06-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2010-01-02 16:05   ` Fwd: " Chris Worley
@ 2010-01-04  1:34   ` David Dillow
       [not found]     ` <1262568846.13289.4.camel-1q1vX8mYZiGLUyTwlgNVppKKF0rrzTr+@public.gmane.org>
  2010-01-06 21:37   ` Roland Dreier
  2 siblings, 1 reply; 35+ messages in thread
From: David Dillow @ 2010-01-04  1:34 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: OFED mailing list, Roland Dreier, Chris Worley

On Sat, 2010-01-02 at 13:19 +0100, Bart Van Assche wrote:
> This patch fixes this issue by adding support for SRP_CRED_REQ information
> units in the SRP initiator. Additionally, this patch makes the per-session
> variable req_lim visible through sysfs, which makes observing the initiator
> state easier.

I agree that we should add support for SRP_CRED_REQ, but I'm not
thrilled with the mix of changes in the patch, as well as the general
aesthetics of the result. How about something like the following series
-- posted as a follow up to this message -- with proper credit for Bart?
I'll sign off on them once we're happy with a direction and Bart acks.

Also, these are all compile tested only, so they need some testing. I
don't have anything that uses these messages, so some help would be
appreciated.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [RFC PATCH 1/3] IB/srp: differentiate the uses of SRP_SQ_SIZE
       [not found]     ` <1262568846.13289.4.camel-1q1vX8mYZiGLUyTwlgNVppKKF0rrzTr+@public.gmane.org>
@ 2010-01-04  1:43       ` David Dillow
  2010-01-04  1:43       ` [RFC PATCH 2/3] IB/srp: minimal support for SRP_CRED_REQ and SRP_AER_REQ David Dillow
                         ` (3 subsequent siblings)
  4 siblings, 0 replies; 35+ messages in thread
From: David Dillow @ 2010-01-04  1:43 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, Roland Dreier, Chris Worley,
	David Dillow

SRP_SQ_SIZE was used in many places that needed to know about the
real size of the data structures, and needed to use a magic offset.
Additionally, the meaning of the value was different depending on
where it was used. Prepare for future work by naming the use
appropriately for the site.

Also, document the assumptions about the ring sizes being a power of 2.
---
The BUILD_BUG_ON() needs to be reworked, once a version of is_power_of_2
that works at compile time is committed upstream. This version is a little
obfuscated for my taste, but I don't want to define a local macro for it
either.

 drivers/infiniband/ulp/srp/ib_srp.c |   24 ++++++++++++++----------
 drivers/infiniband/ulp/srp/ib_srp.h |   10 +++++++---
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 54c8fe2..82e7f9d 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -274,7 +274,7 @@ static void srp_free_target_ib(struct srp_target_port *target)
 
 	for (i = 0; i < SRP_RQ_SIZE; ++i)
 		srp_free_iu(target->srp_host, target->rx_ring[i]);
-	for (i = 0; i < SRP_SQ_SIZE + 1; ++i)
+	for (i = 0; i < SRP_SQ_SIZE; ++i)
 		srp_free_iu(target->srp_host, target->tx_ring[i]);
 }
 
@@ -970,7 +970,7 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target,
 {
 	s32 min = (req_type == SRP_REQ_TASK_MGMT) ? 1 : 2;
 
-	if (target->tx_head - target->tx_tail >= SRP_SQ_SIZE)
+	if (target->tx_head - target->tx_tail >= SRP_SQ_FULL)
 		return NULL;
 
 	if (target->req_lim < min) {
@@ -978,7 +978,7 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target,
 		return NULL;
 	}
 
-	return target->tx_ring[target->tx_head & SRP_SQ_SIZE];
+	return target->tx_ring[target->tx_head & SRP_SQ_MASK];
 }
 
 /*
@@ -997,7 +997,7 @@ static int __srp_post_send(struct srp_target_port *target,
 	list.lkey   = target->srp_host->srp_dev->mr->lkey;
 
 	wr.next       = NULL;
-	wr.wr_id      = target->tx_head & SRP_SQ_SIZE;
+	wr.wr_id      = target->tx_head & SRP_SQ_MASK;
 	wr.sg_list    = &list;
 	wr.num_sge    = 1;
 	wr.opcode     = IB_WR_SEND;
@@ -1103,7 +1103,7 @@ static int srp_alloc_iu_bufs(struct srp_target_port *target)
 			goto err;
 	}
 
-	for (i = 0; i < SRP_SQ_SIZE + 1; ++i) {
+	for (i = 0; i < SRP_SQ_SIZE; ++i) {
 		target->tx_ring[i] = srp_alloc_iu(target->srp_host,
 						  srp_max_iu_len,
 						  GFP_KERNEL, DMA_TO_DEVICE);
@@ -1119,7 +1119,7 @@ err:
 		target->rx_ring[i] = NULL;
 	}
 
-	for (i = 0; i < SRP_SQ_SIZE + 1; ++i) {
+	for (i = 0; i < SRP_SQ_SIZE; ++i) {
 		srp_free_iu(target->srp_host, target->tx_ring[i]);
 		target->tx_ring[i] = NULL;
 	}
@@ -1589,9 +1589,9 @@ static struct scsi_host_template srp_template = {
 	.eh_abort_handler		= srp_abort,
 	.eh_device_reset_handler	= srp_reset_device,
 	.eh_host_reset_handler		= srp_reset_host,
-	.can_queue			= SRP_SQ_SIZE,
+	.can_queue			= SRP_MAX_CREDIT,
 	.this_id			= -1,
-	.cmd_per_lun			= SRP_SQ_SIZE,
+	.cmd_per_lun			= SRP_MAX_CREDIT,
 	.use_clustering			= ENABLE_CLUSTERING,
 	.shost_attrs			= srp_host_attrs
 };
@@ -1776,7 +1776,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
 				printk(KERN_WARNING PFX "bad max cmd_per_lun parameter '%s'\n", p);
 				goto out;
 			}
-			target->scsi_host->cmd_per_lun = min(token, SRP_SQ_SIZE);
+			target->scsi_host->cmd_per_lun = min(token, SRP_MAX_CREDIT);
 			break;
 
 		case SRP_OPT_IO_CLASS:
@@ -1854,7 +1854,7 @@ static ssize_t srp_create_target(struct device *dev,
 
 	INIT_LIST_HEAD(&target->free_reqs);
 	INIT_LIST_HEAD(&target->req_queue);
-	for (i = 0; i < SRP_SQ_SIZE; ++i) {
+	for (i = 0; i < SRP_MAX_CREDIT; ++i) {
 		target->req_ring[i].index = i;
 		list_add_tail(&target->req_ring[i].list, &target->free_reqs);
 	}
@@ -2122,6 +2122,10 @@ static int __init srp_init_module(void)
 {
 	int ret;
 
+	/* This code assumes that the ring sizes are powers of two */
+	BUILD_BUG_ON(SRP_RQ_SIZE != (1UL << ilog2(SRP_RQ_SIZE)));
+	BUILD_BUG_ON(SRP_SQ_SIZE != (1UL << ilog2(SRP_SQ_SIZE)));
+
 	if (srp_sg_tablesize > 255) {
 		printk(KERN_WARNING PFX "Clamping srp_sg_tablesize to 255\n");
 		srp_sg_tablesize = 255;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index e185b90..2742969 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -59,9 +59,13 @@ enum {
 
 	SRP_RQ_SHIFT    	= 6,
 	SRP_RQ_SIZE		= 1 << SRP_RQ_SHIFT,
-	SRP_SQ_SIZE		= SRP_RQ_SIZE - 1,
+	SRP_SQ_SIZE		= SRP_RQ_SIZE,
 	SRP_CQ_SIZE		= SRP_SQ_SIZE + SRP_RQ_SIZE,
 
+	SRP_SQ_FULL		= SRP_SQ_SIZE - 1,
+	SRP_SQ_MASK		= SRP_SQ_SIZE - 1,
+	SRP_MAX_CREDIT		= SRP_SQ_SIZE - 1,
+
 	SRP_TAG_TSK_MGMT	= 1 << (SRP_RQ_SHIFT + 1),
 
 	SRP_FMR_SIZE		= 256,
@@ -146,11 +150,11 @@ struct srp_target_port {
 
 	unsigned		tx_head;
 	unsigned		tx_tail;
-	struct srp_iu	       *tx_ring[SRP_SQ_SIZE + 1];
+	struct srp_iu	       *tx_ring[SRP_SQ_SIZE];
 
 	struct list_head	free_reqs;
 	struct list_head	req_queue;
-	struct srp_request	req_ring[SRP_SQ_SIZE];
+	struct srp_request	req_ring[SRP_MAX_CREDIT];
 
 	struct work_struct	work;
 
-- 
1.6.5.2

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [RFC PATCH 2/3] IB/srp: minimal support for SRP_CRED_REQ and SRP_AER_REQ
       [not found]     ` <1262568846.13289.4.camel-1q1vX8mYZiGLUyTwlgNVppKKF0rrzTr+@public.gmane.org>
  2010-01-04  1:43       ` [RFC PATCH 1/3] IB/srp: differentiate the uses of SRP_SQ_SIZE David Dillow
@ 2010-01-04  1:43       ` David Dillow
       [not found]         ` <1262569417-20341-2-git-send-email-dave-i1Mk8JYDVaaSihdK6806/g@public.gmane.org>
  2010-01-04  1:43       ` [RFC PATCH 3/3] IB/srp: export req_limit via sysfs David Dillow
                         ` (2 subsequent siblings)
  4 siblings, 1 reply; 35+ messages in thread
From: David Dillow @ 2010-01-04  1:43 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, Roland Dreier, Chris Worley,
	David Dillow

This patch adds support for SRP_CRED_REQ to avoid a lockup by targets
that use that mechanism to return credits to the initiator. This
prevents a lockup observed in the field where we would never add the
credits from the SRP_CRED_REQ to our current count, and would therefore
never send another command to the target.

Minimal support for SRP_AER_REQ is also added, as these messages can
also be used to convey additional credits to the initiator.

This should fix http://bugzilla.kernel.org/show_bug.cgi?id=14235

Reported-by: Chris Worley <worleys-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
---
 drivers/infiniband/ulp/srp/ib_srp.c |   95 ++++++++++++++++++++++++++++++++---
 drivers/infiniband/ulp/srp/ib_srp.h |    8 +++-
 include/scsi/srp.h                  |   49 ++++++++++++++++++
 3 files changed, 143 insertions(+), 9 deletions(-)

diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 82e7f9d..c0a5ccb 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -82,6 +82,8 @@ static void srp_add_one(struct ib_device *device);
 static void srp_remove_one(struct ib_device *device);
 static void srp_completion(struct ib_cq *cq, void *target_ptr);
 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
+static int srp_response_common(struct srp_target_port *target, s32 req_delta,
+			       void *rsp, int len);
 
 static struct scsi_transport_template *ib_srp_transport_template;
 
@@ -845,6 +847,37 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
 	spin_unlock_irqrestore(target->scsi_host->host_lock, flags);
 }
 
+static void srp_process_cred_req(struct srp_target_port *target,
+				 struct srp_cred_req *req)
+{
+	struct srp_cred_rsp rsp = {
+		.opcode = SRP_CRED_RSP,
+		.tag = req->tag,
+	};
+	s32 delta = be32_to_cpu(req->req_lim_delta);
+
+	if (srp_response_common(target, delta, &rsp, sizeof rsp))
+		shost_printk(KERN_ERR, target->scsi_host, PFX
+			     "problems processing SRP_CRED_REQ\n");
+}
+
+static void srp_process_aer_req(struct srp_target_port *target,
+				 struct srp_aer_req *req)
+{
+	struct srp_aer_rsp rsp = {
+		.opcode = SRP_AER_RSP,
+		.tag = req->tag,
+	};
+	s32 delta = be32_to_cpu(req->req_lim_delta);
+
+	shost_printk(KERN_ERR, target->scsi_host, PFX
+		     "ignoring AER for LUN %llu\n", be64_to_cpu(req->lun));
+
+	if (srp_response_common(target, delta, &rsp, sizeof rsp))
+		shost_printk(KERN_ERR, target->scsi_host, PFX
+			     "problems processing SRP_AER_REQ\n");
+}
+
 static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
 {
 	struct ib_device *dev;
@@ -888,6 +921,14 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
 			     PFX "Got target logout request\n");
 		break;
 
+	case SRP_CRED_REQ:
+		srp_process_cred_req(target, iu->buf);
+		break;
+
+	case SRP_AER_REQ:
+		srp_process_aer_req(target, iu->buf);
+		break;
+
 	default:
 		shost_printk(KERN_WARNING, target->scsi_host,
 			     PFX "Unhandled SRP opcode 0x%02x\n", opcode);
@@ -973,7 +1014,8 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target,
 	if (target->tx_head - target->tx_tail >= SRP_SQ_FULL)
 		return NULL;
 
-	if (target->req_lim < min) {
+	/* SRP responses do not consume credits */
+	if (req_type != SRP_REQ_RESPONSE && target->req_lim < min) {
 		++target->zero_req_lim;
 		return NULL;
 	}
@@ -986,7 +1028,7 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target,
  * req_lim and tx_head.
  */
 static int __srp_post_send(struct srp_target_port *target,
-			   struct srp_iu *iu, int len)
+			   struct srp_iu *iu, int len, int credits)
 {
 	struct ib_sge list;
 	struct ib_send_wr wr, *bad_wr;
@@ -1007,7 +1049,7 @@ static int __srp_post_send(struct srp_target_port *target,
 
 	if (!ret) {
 		++target->tx_head;
-		--target->req_lim;
+		target->req_lim -= credits;
 	}
 
 	return ret;
@@ -1075,7 +1117,7 @@ static int srp_queuecommand(struct scsi_cmnd *scmnd,
 	ib_dma_sync_single_for_device(dev, iu->dma, srp_max_iu_len,
 				      DMA_TO_DEVICE);
 
-	if (__srp_post_send(target, iu, len)) {
+	if (__srp_post_send(target, iu, len, 1)) {
 		shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
 		goto err_unmap;
 	}
@@ -1091,6 +1133,40 @@ err:
 	return SCSI_MLQUEUE_HOST_BUSY;
 }
 
+static int srp_response_common(struct srp_target_port *target, s32 req_delta,
+			       void *rsp, int len)
+{
+	struct ib_device *dev;
+	unsigned long flags;
+	struct srp_iu *iu;
+	int err = 1;
+
+	dev = target->srp_host->srp_dev->dev;
+
+	spin_lock_irqsave(target->scsi_host->host_lock, flags);
+	target->req_lim += req_delta;
+
+	iu = __srp_get_tx_iu(target, SRP_REQ_RESPONSE);
+	if (!iu) {
+		shost_printk(KERN_ERR, target->scsi_host, PFX
+			     "no IU available to send response\n");
+		goto out;
+	}
+
+	ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
+	memcpy(iu->buf, rsp, len);
+	ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
+
+	err = __srp_post_send(target, iu, len, 0);
+	if (err)
+		shost_printk(KERN_ERR, target->scsi_host, PFX
+			     "unable to post response: %d\n", err);
+
+out:
+	spin_unlock_irqrestore(target->scsi_host->host_lock, flags);
+	return err;
+}
+
 static int srp_alloc_iu_bufs(struct srp_target_port *target)
 {
 	int i;
@@ -1214,6 +1290,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
 	int attr_mask = 0;
 	int comp = 0;
 	int opcode = 0;
+	int i;
 
 	switch (event->event) {
 	case IB_CM_REQ_ERROR:
@@ -1263,9 +1340,11 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
 		if (target->status)
 			break;
 
-		target->status = srp_post_recv(target);
-		if (target->status)
-			break;
+		for (i = 0; i < SRP_RESP_RESV; ++i) {
+			target->status = srp_post_recv(target);
+			if (target->status)
+				break;
+		}
 
 		qp_attr->qp_state = IB_QPS_RTS;
 		target->status = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
@@ -1353,7 +1432,7 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,
 	tsk_mgmt->tsk_mgmt_func = func;
 	tsk_mgmt->task_tag 	= req->index;
 
-	if (__srp_post_send(target, iu, sizeof *tsk_mgmt))
+	if (__srp_post_send(target, iu, sizeof *tsk_mgmt, 1))
 		goto out;
 
 	req->tsk_mgmt = iu;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index 2742969..2ae3a8f 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -57,14 +57,19 @@ enum {
 	SRP_MAX_LUN		= 512,
 	SRP_DEF_SG_TABLESIZE	= 12,
 
+	/* SRP_RQ_SHIFT sets the physical size of the TX/RX queues */
 	SRP_RQ_SHIFT    	= 6,
 	SRP_RQ_SIZE		= 1 << SRP_RQ_SHIFT,
 	SRP_SQ_SIZE		= SRP_RQ_SIZE,
 	SRP_CQ_SIZE		= SRP_SQ_SIZE + SRP_RQ_SIZE,
 
+	/* SRP_RESP_RESV sets the number of queue entries reserved for
+	 * unsolicited messages from the target and the responses to them.
+	 */
+	SRP_RESP_RESV		= 2,
 	SRP_SQ_FULL		= SRP_SQ_SIZE - 1,
 	SRP_SQ_MASK		= SRP_SQ_SIZE - 1,
-	SRP_MAX_CREDIT		= SRP_SQ_SIZE - 1,
+	SRP_MAX_CREDIT		= SRP_SQ_SIZE - SRP_RESP_RESV - 1,
 
 	SRP_TAG_TSK_MGMT	= 1 << (SRP_RQ_SHIFT + 1),
 
@@ -85,6 +90,7 @@ enum srp_target_state {
 enum srp_request_type {
 	SRP_REQ_NORMAL,
 	SRP_REQ_TASK_MGMT,
+	SRP_REQ_RESPONSE,
 };
 
 struct srp_device {
diff --git a/include/scsi/srp.h b/include/scsi/srp.h
index ad178fa..ae7dcfd 100644
--- a/include/scsi/srp.h
+++ b/include/scsi/srp.h
@@ -239,4 +239,53 @@ struct srp_rsp {
 	u8	data[0];
 } __attribute__((packed));
 
+/*
+ * SRP_CRED_REQ information unit, as defined in section 6.10 of the
+ * T10 SRP r16a document.
+ */
+struct srp_cred_req {
+	u8	opcode;
+	u8	sol_not;
+	u8	reserved[2];
+	__be32	req_lim_delta;
+	u64	tag;
+} __attribute__((packed));
+
+/*
+ * SRP_CRED_RSP information unit, as defined in section 6.11 of the
+ * T10 SRP r16a document.
+ */
+struct srp_cred_rsp {
+	u8	opcode;
+	u8	reserved[7];
+	u64	tag;
+} __attribute__((packed));
+
+/*
+ * SRP_AER_REQ information unit, as defined in section 6.12 of the
+ * T10 SRP r16a document.
+ */
+struct srp_aer_req {
+	u8	opcode;
+	u8	sol_not;
+	u8	reserved[2];
+	__be32	req_lim_delta;
+	u64	tag;
+	u32	reserved2;
+	__be64	lun;
+	__be32	sense_data_len;
+	u32	reserved3;
+	u8	sense_data[0];
+} __attribute__((packed));
+
+/*
+ * SRP_AER_RSP information unit, as defined in section 6.13 of the
+ * T10 SRP r16a document.
+ */
+struct srp_aer_rsp {
+	u8	opcode;
+	u8	reserved[7];
+	u64	tag;
+} __attribute__((packed));
+
 #endif /* SCSI_SRP_H */
-- 
1.6.5.2

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [RFC PATCH 3/3] IB/srp: export req_limit via sysfs
       [not found]     ` <1262568846.13289.4.camel-1q1vX8mYZiGLUyTwlgNVppKKF0rrzTr+@public.gmane.org>
  2010-01-04  1:43       ` [RFC PATCH 1/3] IB/srp: differentiate the uses of SRP_SQ_SIZE David Dillow
  2010-01-04  1:43       ` [RFC PATCH 2/3] IB/srp: minimal support for SRP_CRED_REQ and SRP_AER_REQ David Dillow
@ 2010-01-04  1:43       ` David Dillow
  2010-01-04  7:13       ` [PATCH] IB/srp: Fix initiator lockup Bart Van Assche
  2010-01-06 21:38       ` Roland Dreier
  4 siblings, 0 replies; 35+ messages in thread
From: David Dillow @ 2010-01-04  1:43 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, Roland Dreier, Chris Worley,
	David Dillow

Export the target's current request limit, in case it is useful for
debugging.
---
I don't feel strongly that this should go in, only that it should be
a separate patch if it does.

 drivers/infiniband/ulp/srp/ib_srp.c |   14 ++++++++++++++
 1 files changed, 14 insertions(+), 0 deletions(-)

diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index c0a5ccb..c68d8d6 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -1620,6 +1620,18 @@ static ssize_t show_zero_req_lim(struct device *dev,
 	return sprintf(buf, "%d\n", target->zero_req_lim);
 }
 
+static ssize_t show_req_lim(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct srp_target_port *target = host_to_target(class_to_shost(dev));
+
+	if (target->state == SRP_TARGET_DEAD ||
+	    target->state == SRP_TARGET_REMOVED)
+		return -ENODEV;
+
+	return sprintf(buf, "%d\n", target->req_lim);
+}
+
 static ssize_t show_local_ib_port(struct device *dev,
 				  struct device_attribute *attr, char *buf)
 {
@@ -1642,6 +1654,7 @@ static DEVICE_ATTR(service_id,	    S_IRUGO, show_service_id,	   NULL);
 static DEVICE_ATTR(pkey,	    S_IRUGO, show_pkey,		   NULL);
 static DEVICE_ATTR(dgid,	    S_IRUGO, show_dgid,		   NULL);
 static DEVICE_ATTR(orig_dgid,	    S_IRUGO, show_orig_dgid,	   NULL);
+static DEVICE_ATTR(req_lim,	    S_IRUGO, show_req_lim,	   NULL);
 static DEVICE_ATTR(zero_req_lim,    S_IRUGO, show_zero_req_lim,	   NULL);
 static DEVICE_ATTR(local_ib_port,   S_IRUGO, show_local_ib_port,   NULL);
 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
@@ -1653,6 +1666,7 @@ static struct device_attribute *srp_host_attrs[] = {
 	&dev_attr_pkey,
 	&dev_attr_dgid,
 	&dev_attr_orig_dgid,
+	&dev_attr_req_lim,
 	&dev_attr_zero_req_lim,
 	&dev_attr_local_ib_port,
 	&dev_attr_local_ib_device,
-- 
1.6.5.2

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [PATCH] IB/srp: Fix initiator lockup
       [not found]     ` <1262568846.13289.4.camel-1q1vX8mYZiGLUyTwlgNVppKKF0rrzTr+@public.gmane.org>
                         ` (2 preceding siblings ...)
  2010-01-04  1:43       ` [RFC PATCH 3/3] IB/srp: export req_limit via sysfs David Dillow
@ 2010-01-04  7:13       ` Bart Van Assche
       [not found]         ` <e2e108261001032313v472d4b9dm75c3d0b1a9268adc-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2010-01-06 21:38       ` Roland Dreier
  4 siblings, 1 reply; 35+ messages in thread
From: Bart Van Assche @ 2010-01-04  7:13 UTC (permalink / raw)
  To: David Dillow; +Cc: OFED mailing list, Roland Dreier, Chris Worley

On Mon, Jan 4, 2010 at 2:34 AM, David Dillow <dave-i1Mk8JYDVaaSihdK6806/g@public.gmane.org> wrote:
>
> On Sat, 2010-01-02 at 13:19 +0100, Bart Van Assche wrote:
> > This patch fixes this issue by adding support for SRP_CRED_REQ information
> > units in the SRP initiator. Additionally, this patch makes the per-session
> > variable req_lim visible through sysfs, which makes observing the initiator
> > state easier.
>
> I agree that we should add support for SRP_CRED_REQ, but I'm not
> thrilled with the mix of changes in the patch, as well as the general
> aesthetics of the result. How about something like the following series
> -- posted as a follow up to this message -- with proper credit for Bart?
> I'll sign off on them once we're happy with a direction and Bart acks.
>
> Also, these are all compile tested only, so they need some testing. I
> don't have anything that uses these messages, so some help would be
> appreciated.

Is that regular kernel coding practice, to run away with the work
someone else did and to claim authorship ? As far as I know this is
considered as impolite.

Bart.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] IB/srp: Fix initiator lockup
       [not found]         ` <e2e108261001032313v472d4b9dm75c3d0b1a9268adc-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2010-01-04  8:12           ` David Dillow
       [not found]             ` <1262592761.13289.13.camel-1q1vX8mYZiGLUyTwlgNVppKKF0rrzTr+@public.gmane.org>
  2010-01-06 21:40           ` Roland Dreier
  1 sibling, 1 reply; 35+ messages in thread
From: David Dillow @ 2010-01-04  8:12 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: OFED mailing list, Roland Dreier, Chris Worley

On Mon, 2010-01-04 at 08:13 +0100, Bart Van Assche wrote:
> On Mon, Jan 4, 2010 at 2:34 AM, David Dillow <dave-i1Mk8JYDVaaSihdK6806/g@public.gmane.org> wrote:
> > I agree that we should add support for SRP_CRED_REQ, but I'm not
> > thrilled with the mix of changes in the patch, as well as the general
> > aesthetics of the result. How about something like the following series
> > -- posted as a follow up to this message -- with proper credit for Bart?
> > I'll sign off on them once we're happy with a direction and Bart acks.
> >
> > Also, these are all compile tested only, so they need some testing. I
> > don't have anything that uses these messages, so some help would be
> > appreciated.
> 
> Is that regular kernel coding practice, to run away with the work
> someone else did and to claim authorship ? As far as I know this is
> considered as impolite.

At no time did I claim authorship. If you re-read what I wrote, I
explicitly listed proper credit to you as an outstanding issue.

I didn't like the way the code looked after your patch, and the three
separate changes mashed up into one patch. So I took time out of my
afternoon to break them up to better fit my interpretation of kernel
coding practice. I don't really care who's name they go in under.

Do you have any technical comments?

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] IB/srp: Fix initiator lockup
       [not found]             ` <1262592761.13289.13.camel-1q1vX8mYZiGLUyTwlgNVppKKF0rrzTr+@public.gmane.org>
@ 2010-01-04  8:32               ` Bart Van Assche
  0 siblings, 0 replies; 35+ messages in thread
From: Bart Van Assche @ 2010-01-04  8:32 UTC (permalink / raw)
  To: David Dillow; +Cc: OFED mailing list, Roland Dreier, Chris Worley

On Mon, Jan 4, 2010 at 9:12 AM, David Dillow <dave-i1Mk8JYDVaaSihdK6806/g@public.gmane.org> wrote:
>
> On Mon, 2010-01-04 at 08:13 +0100, Bart Van Assche wrote:
> > On Mon, Jan 4, 2010 at 2:34 AM, David Dillow <dave-i1Mk8JYDVaaSihdK6806/g@public.gmane.org> wrote:
> > > I agree that we should add support for SRP_CRED_REQ, but I'm not
> > > thrilled with the mix of changes in the patch, as well as the general
> > > aesthetics of the result. How about something like the following series
> > > -- posted as a follow up to this message -- with proper credit for Bart?
> > > I'll sign off on them once we're happy with a direction and Bart acks.
> > >
> > > Also, these are all compile tested only, so they need some testing. I
> > > don't have anything that uses these messages, so some help would be
> > > appreciated.
> >
> > Is that regular kernel coding practice, to run away with the work
> > someone else did and to claim authorship ? As far as I know this is
> > considered as impolite.
>
> At no time did I claim authorship. If you re-read what I wrote, I
> explicitly listed proper credit to you as an outstanding issue.
>
> I didn't like the way the code looked after your patch, and the three
> separate changes mashed up into one patch. So I took time out of my
> afternoon to break them up to better fit my interpretation of kernel
> coding practice. I don't really care who's name they go in under.
>
> Do you have any technical comments?

I will wait for more review comments on my patch and will post a second
version of my patch. Thanks for the review and the feedback.

Bart.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] IB/srp: Fix initiator lockup
       [not found] ` <e2e108261001020419l36319156hb9d625edc2e15d06-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2010-01-02 16:05   ` Fwd: " Chris Worley
  2010-01-04  1:34   ` David Dillow
@ 2010-01-06 21:37   ` Roland Dreier
       [not found]     ` <ada4omyud7c.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
  2 siblings, 1 reply; 35+ messages in thread
From: Roland Dreier @ 2010-01-06 21:37 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: OFED mailing list, Chris Worley

 > When the SRP initiator is communicating with an SRP target under load it can
 > happen that the SRP initiator locks up. The communication pattern that causes
 > the lockup is as follows:
 > * SRP initiator sends (req_lim - 2) SRP_CMD requests to the target.
 > * The REQUEST LIMIT DELTA field of each SRP_RSP response is zero.
 > * The target sends an SRP_CRED_REQ information unit with non-zero REQUEST
 >   LIMIT DELTA.
 > 
 > The above communication pattern brings the initiator in the following state:
 > * srp_queuecommand() always returns SCSI_MLQUEUE_HOST_BUSY.
 > * The per-session variable zero_req_lim keeps increasing.
 > The initiator never leaves this state because it ignores SRP_CRED_REQ
 > information units.

This is all a bit obfuscated.  The problem is that the initiator runs
out of credits and stops sending commands; because we don't process
SRP_CRED_REQ messages from the target, we never get more credits.

I'm wondering why this took so long to come up?  Does SCST send
SRP_CRED_REQ only under unusual circumstances?  Also I'm wondering why
the "Unhandled SRP opcode" message didn't show up in the kernel log and
help debug this?

Some specific comments:

 > +/* Similar to is_power_of_2(), but can be evaluated at compile time. */
 > +#define IS_POWER_OF_2(n) ((n) != 0 && (((n) & ((n) - 1)) == 0))

I don't think this level of ugliness is really required -- we can just
document carefully at the definition that we need things to be powers of 2.

 > +	for (i = 0; i < ARRAY_SIZE(target->txp_ring); ++i)
 > +		srp_free_iu(target->srp_host, target->txp_ring[i]);

Not sure I understand why we need two TX rings -- why can't we just have
one bigger TX ring that handles both requests and responses?

 > + * Obtain an information unit for sending a request to the target.

I think there's a bit of overcommenting in a few places.  Does it really
help anyone to repeat that what "get_tx_iu" does is "get an information
unit for sending"?

 > -	return target->tx_ring[target->tx_head & SRP_SQ_SIZE];
 > +	return target->tx_ring[target->tx_head
 > +			       & (ARRAY_SIZE(target->tx_ring) - 1)];

is this an improvement?

 > + * Send a request to the target.
 > +static int __srp_post_send_req(struct srp_target_port *target,

same -- does the comment add anything?

 > +	/* Completion queue. */
 > +	SRP_CQ_SIZE		= SRP_SQ_SIZE + SRP_TXP_SIZE + SRP_RQ_SIZE,

etc... all these comments are mostly taking up vertical space and
visually jarring, without adding much info.

 > +/*
 > + * SRP_CRED_REQ information unit, as defined in section 6.10 of the
 > + * T10 SRP r16a document.
 > + */
 > +struct srp_cred_req {
 > +	u8	opcode;
 > +	u8	sol_not;
 > +	u8	reserved[2];
 > +	__be32	req_lim_delta;
 > +	u64	tag;
 > +} __attribute__((packed));

again... does it help anyone to say "struct srp_cred_req" corresponds to
SRP_CRED_REQ?  <scsi/srp.h> already refers to the SRP document, and I
would think anyone looking up SRP_CRED_REQ could find it faster by
looking for the string itself rather than by section number.

The existing comments in the file are actually useful -- they explain
why some structures need to be packed, and as far as I can tell neither
srp_cred_req nor srp_cred_rsp need to be packed.

 - R.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] IB/srp: Fix initiator lockup
       [not found]     ` <1262568846.13289.4.camel-1q1vX8mYZiGLUyTwlgNVppKKF0rrzTr+@public.gmane.org>
                         ` (3 preceding siblings ...)
  2010-01-04  7:13       ` [PATCH] IB/srp: Fix initiator lockup Bart Van Assche
@ 2010-01-06 21:38       ` Roland Dreier
  4 siblings, 0 replies; 35+ messages in thread
From: Roland Dreier @ 2010-01-06 21:38 UTC (permalink / raw)
  To: David Dillow; +Cc: Bart Van Assche, OFED mailing list, Chris Worley


 > I agree that we should add support for SRP_CRED_REQ, but I'm not
 > thrilled with the mix of changes in the patch, as well as the general
 > aesthetics of the result. How about something like the following series
 > -- posted as a follow up to this message -- with proper credit for Bart?
 > I'll sign off on them once we're happy with a direction and Bart acks.

Thanks very much for doing this!  I agree that splitting up this way
makes the patches much easier to review, and is the right way to do this.

 - R.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] IB/srp: Fix initiator lockup
       [not found]         ` <e2e108261001032313v472d4b9dm75c3d0b1a9268adc-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2010-01-04  8:12           ` David Dillow
@ 2010-01-06 21:40           ` Roland Dreier
       [not found]             ` <adavdfesyi4.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
  1 sibling, 1 reply; 35+ messages in thread
From: Roland Dreier @ 2010-01-06 21:40 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: David Dillow, OFED mailing list, Chris Worley


 > Is that regular kernel coding practice, to run away with the work
 > someone else did and to claim authorship ? As far as I know this is
 > considered as impolite.

I don't see "with proper credit for Bart" as claiming authorship.  And
yes, I think proposing a better way of doing things is definitely
regular kernel coding practice.  It's more than polite -- it's going way
beyond helpful review comments and actually helping revise the patches.

 - R.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [RFC PATCH 2/3] IB/srp: minimal support for SRP_CRED_REQ and SRP_AER_REQ
       [not found]         ` <1262569417-20341-2-git-send-email-dave-i1Mk8JYDVaaSihdK6806/g@public.gmane.org>
@ 2010-01-06 21:48           ` Roland Dreier
       [not found]             ` <adar5q2sy5q.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
  0 siblings, 1 reply; 35+ messages in thread
From: Roland Dreier @ 2010-01-06 21:48 UTC (permalink / raw)
  To: David Dillow
  Cc: Bart Van Assche, linux-rdma-u79uwXL29TY76Z2rM5mHXA, Chris Worley

Looks pretty good; a few minor comments:

 > +static int srp_response_common(struct srp_target_port *target, s32 req_delta,
 > +			       void *rsp, int len);

Didn't check -- can we reorder things to avoid this forward declaration?

 > +	shost_printk(KERN_ERR, target->scsi_host, PFX
 > +		     "ignoring AER for LUN %llu\n", be64_to_cpu(req->lun));

I wonder if we should dump the sense data here, while we're at it.  Do
any targets even send this?

 >  static int __srp_post_send(struct srp_target_port *target,
 > -			   struct srp_iu *iu, int len)
 > +			   struct srp_iu *iu, int len, int credits)

I wonder if this would be slightly clearer/safer if we made the
parameter "bool consume_credit" or something like that?  Since we should
never pass in anything except 0 or 1, right?

 > +	/* SRP_RESP_RESV sets the number of queue entries reserved for
 > +	 * unsolicited messages from the target and the responses to them.
 > +	 */
 > +	SRP_RESP_RESV		= 2,

Looking at this -- where does the value 2 come from?  We currently
always keep one receive posted, right?  I guess the issue is that we
might get a CRED_REQ and a logout request or something like that, but
that should work OK if we process them one at a time.

Or is this change important too?

 > +} __attribute__((packed));

I don't think any of the new structures need to be packed actually.

 - R.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [RFC PATCH 2/3] IB/srp: minimal support for SRP_CRED_REQ and SRP_AER_REQ
       [not found]             ` <adar5q2sy5q.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
@ 2010-01-06 23:02               ` David Dillow
       [not found]                 ` <1262818979.2265.13.camel-FqX9LgGZnHWDB2HL1qBt2PIbXMQ5te18@public.gmane.org>
  0 siblings, 1 reply; 35+ messages in thread
From: David Dillow @ 2010-01-06 23:02 UTC (permalink / raw)
  To: Roland Dreier
  Cc: Bart Van Assche, linux-rdma-u79uwXL29TY76Z2rM5mHXA, Chris Worley

On Wed, 2010-01-06 at 13:48 -0800, Roland Dreier wrote:
> Looks pretty good; a few minor comments:
> 
>  > +static int srp_response_common(struct srp_target_port *target, s32 req_delta,
>  > +			       void *rsp, int len);
> 
> Didn't check -- can we reorder things to avoid this forward declaration?

I don't think we can without a lot of code heft. We'd need to pull
__srp_post_send and _get_tx_iu up. I ordered it the way I did to avoid
having two declarations instead of the one.

I'll take another look, perhaps it can be done.

>  > +	shost_printk(KERN_ERR, target->scsi_host, PFX
>  > +		     "ignoring AER for LUN %llu\n", be64_to_cpu(req->lun));
> 
> I wonder if we should dump the sense data here, while we're at it.  Do
> any targets even send this?

I've never seen it in the field, but I've only got two HW vendors worth
to look at.

Is there a good function to dump SCSI sense data? If so, then it makes
sense to dump it, yeah. Better yet, inject it into the SCSI mid-layer.

>  >  static int __srp_post_send(struct srp_target_port *target,
>  > -			   struct srp_iu *iu, int len)
>  > +			   struct srp_iu *iu, int len, int credits)
> 
> I wonder if this would be slightly clearer/safer if we made the
> parameter "bool consume_credit" or something like that?  Since we should
> never pass in anything except 0 or 1, right?

Makes sense, or we could pass in the request type and do

if (reqtype != SRP_REQ_RESPONSE)
	--req_limit;

>  > +	/* SRP_RESP_RESV sets the number of queue entries reserved for
>  > +	 * unsolicited messages from the target and the responses to them.
>  > +	 */
>  > +	SRP_RESP_RESV		= 2,
> 
> Looking at this -- where does the value 2 come from?  We currently
> always keep one receive posted, right?  I guess the issue is that we
> might get a CRED_REQ and a logout request or something like that, but
> that should work OK if we process them one at a time.
> 
> Or is this change important too?

I think it should be adjusted, probably in the first patch, to make
things a bit more clear -- the way things are in the patch, we set
SRP_RESP_RESV to 2, but really reserve 3 RX entries (and post 2 --
BUG!). It should be at least one to always keep a reply posted, but I
worried about handling multiple messages without room on the TX ring. I
wanted us to have more room to avoid a drop, but I'll admit I haven't
thought it completely through.

> 
>  > +} __attribute__((packed));
> 
> I don't think any of the new structures need to be packed actually.

Only srp_aer_req needs packing I think -- u32 reserved on an 8 byte
boundary followed by __be64 lun.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [RFC PATCH 2/3] IB/srp: minimal support for SRP_CRED_REQ and SRP_AER_REQ
       [not found]                 ` <1262818979.2265.13.camel-FqX9LgGZnHWDB2HL1qBt2PIbXMQ5te18@public.gmane.org>
@ 2010-01-06 23:20                   ` Roland Dreier
  0 siblings, 0 replies; 35+ messages in thread
From: Roland Dreier @ 2010-01-06 23:20 UTC (permalink / raw)
  To: David Dillow
  Cc: Bart Van Assche, linux-rdma-u79uwXL29TY76Z2rM5mHXA, Chris Worley


 > Is there a good function to dump SCSI sense data? If so, then it makes
 > sense to dump it, yeah. Better yet, inject it into the SCSI mid-layer.

A quick look doesn't find any sense-dumping function.  Probably not
worth worrying about it until someone starts seeing async errors anyway.

 > Only srp_aer_req needs packing I think -- u32 reserved on an 8 byte
 > boundary followed by __be64 lun.

Yup, correct on that -- I missed that one.

 > Makes sense, or we could pass in the request type and do
 > 
 > if (reqtype != SRP_REQ_RESPONSE)
 > 	--req_limit;

Works as well ... even stash away the request type when allocating a tx
iu, so we don't have to pass around the type more.

 - R.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] IB/srp: Fix initiator lockup
       [not found]     ` <ada4omyud7c.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
@ 2010-01-07  7:52       ` Bart Van Assche
       [not found]         ` <e2e108261001062352l1925f4e1i24bdbdde08056c4c-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 35+ messages in thread
From: Bart Van Assche @ 2010-01-07  7:52 UTC (permalink / raw)
  To: Roland Dreier; +Cc: OFED mailing list, Chris Worley

On Wed, Jan 6, 2010 at 10:37 PM, Roland Dreier <rdreier-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org> wrote:
>
>  > When the SRP initiator is communicating with an SRP target under load it can
>  > happen that the SRP initiator locks up. The communication pattern that causes
>  > the lockup is as follows:
>  > * SRP initiator sends (req_lim - 2) SRP_CMD requests to the target.
>  > * The REQUEST LIMIT DELTA field of each SRP_RSP response is zero.
>  > * The target sends an SRP_CRED_REQ information unit with non-zero REQUEST
>  >   LIMIT DELTA.
>  >
>  > The above communication pattern brings the initiator in the following state:
>  > * srp_queuecommand() always returns SCSI_MLQUEUE_HOST_BUSY.
>  > * The per-session variable zero_req_lim keeps increasing.
>  > The initiator never leaves this state because it ignores SRP_CRED_REQ
>  > information units.
>
> This is all a bit obfuscated.  The problem is that the initiator runs
> out of credits and stops sending commands; because we don't process
> SRP_CRED_REQ messages from the target, we never get more credits.
>
> I'm wondering why this took so long to come up?  Does SCST send
> SRP_CRED_REQ only under unusual circumstances?  Also I'm wondering why
> the "Unhandled SRP opcode" message didn't show up in the kernel log and
> help debug this?

It was only recently that I had the time to analyze this issue in
depth and that I added SRP_CRED_REQ support in SCST (checked in that
support about one week ago). Before SRP_CRED_REQ support was added in
SCST, the SRP initiator could not log the "Unhandled SRP opcode"
message because it did not receive any unhandled SRP opcodes.

>  > +    for (i = 0; i < ARRAY_SIZE(target->txp_ring); ++i)
>  > +            srp_free_iu(target->srp_host, target->txp_ring[i]);
>
> Not sure I understand why we need two TX rings -- why can't we just have
> one bigger TX ring that handles both requests and responses?

At the time I added the second TX ring it wasn't yet clear to me that
one single TX ring would work too. I'll merge both TX rings.

Bart.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] IB/srp: Fix initiator lockup
       [not found]         ` <e2e108261001062352l1925f4e1i24bdbdde08056c4c-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2010-01-07  7:57           ` Roland Dreier
       [not found]             ` <adak4vuqrcw.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
  0 siblings, 1 reply; 35+ messages in thread
From: Roland Dreier @ 2010-01-07  7:57 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: OFED mailing list, Chris Worley


 > It was only recently that I had the time to analyze this issue in
 > depth and that I added SRP_CRED_REQ support in SCST (checked in that
 > support about one week ago). Before SRP_CRED_REQ support was added in
 > SCST, the SRP initiator could not log the "Unhandled SRP opcode"
 > message because it did not receive any unhandled SRP opcodes.

So it sounds like this is fixing something that was not the original
bug?  Because SCST wasn't sending SRP_CRED_REQ when the issue of the
initiator hanging was originally seen, if I understand correctly.

Do you understand the root cause of the original bug?

 - R.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] IB/srp: Fix initiator lockup
       [not found]             ` <adavdfesyi4.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
@ 2010-01-07  7:59               ` Bart Van Assche
       [not found]                 ` <e2e108261001062359l66384219r650294b4d6e9f3f1-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 35+ messages in thread
From: Bart Van Assche @ 2010-01-07  7:59 UTC (permalink / raw)
  To: Roland Dreier; +Cc: David Dillow, OFED mailing list, Chris Worley

On Wed, Jan 6, 2010 at 10:40 PM, Roland Dreier <rdreier-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org> wrote:
>
>  > Is that regular kernel coding practice, to run away with the work
>  > someone else did and to claim authorship ? As far as I know this is
>  > considered as impolite.
>
> I don't see "with proper credit for Bart" as claiming authorship.  And
> yes, I think proposing a better way of doing things is definitely
> regular kernel coding practice.  It's more than polite -- it's going way
> beyond helpful review comments and actually helping revise the patches.

You are welcome to have a look at the descriptions of the three
patches posted by David Dillow. I was not credited in any way in these
descriptions. Not for analyzing the problem, not for the design of a
solution and not for the work I did in implementing a solution. That
is why I wrote that David Dillow was claiming authorship of the work I
did.

Bart.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] IB/srp: Fix initiator lockup
       [not found]             ` <adak4vuqrcw.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
@ 2010-01-07  8:03               ` Bart Van Assche
       [not found]                 ` <e2e108261001070003y6881eac7g3e8129e44b78a475-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 35+ messages in thread
From: Bart Van Assche @ 2010-01-07  8:03 UTC (permalink / raw)
  To: Roland Dreier; +Cc: OFED mailing list, Chris Worley

On Thu, Jan 7, 2010 at 8:57 AM, Roland Dreier <rdreier-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org> wrote:
>
>  > It was only recently that I had the time to analyze this issue in
>  > depth and that I added SRP_CRED_REQ support in SCST (checked in that
>  > support about one week ago). Before SRP_CRED_REQ support was added in
>  > SCST, the SRP initiator could not log the "Unhandled SRP opcode"
>  > message because it did not receive any unhandled SRP opcodes.
>
> So it sounds like this is fixing something that was not the original
> bug?  Because SCST wasn't sending SRP_CRED_REQ when the issue of the
> initiator hanging was originally seen, if I understand correctly.

The patch I posted is really fixing the original bug. The problem was
that neither the SRP target nor the SRP initiator had support for
SRP_CRED_REQ. Support for SRP_CRED_REQ has to be added to both
software components in order to fix this bug.

Bart.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] IB/srp: Fix initiator lockup
       [not found]                 ` <e2e108261001070003y6881eac7g3e8129e44b78a475-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2010-01-07 10:01                   ` Bart Van Assche
  2010-01-10  6:41                   ` Roland Dreier
  1 sibling, 0 replies; 35+ messages in thread
From: Bart Van Assche @ 2010-01-07 10:01 UTC (permalink / raw)
  To: Roland Dreier; +Cc: OFED mailing list, Chris Worley

On Thu, Jan 7, 2010 at 9:03 AM, Bart Van Assche
<bart.vanassche-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:
> On Thu, Jan 7, 2010 at 8:57 AM, Roland Dreier <rdreier-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org> wrote:
>>
>>  > It was only recently that I had the time to analyze this issue in
>>  > depth and that I added SRP_CRED_REQ support in SCST (checked in that
>>  > support about one week ago). Before SRP_CRED_REQ support was added in
>>  > SCST, the SRP initiator could not log the "Unhandled SRP opcode"
>>  > message because it did not receive any unhandled SRP opcodes.
>>
>> So it sounds like this is fixing something that was not the original
>> bug?  Because SCST wasn't sending SRP_CRED_REQ when the issue of the
>> initiator hanging was originally seen, if I understand correctly.
>
> The patch I posted is really fixing the original bug. The problem was
> that neither the SRP target nor the SRP initiator had support for
> SRP_CRED_REQ. Support for SRP_CRED_REQ has to be added to both
> software components in order to fix this bug.

However, I have to admit that the patch description should have
described what happened more in detail. I'll address that in the
second version of my patch.

Bart.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] IB/srp: Fix initiator lockup
       [not found]                 ` <e2e108261001062359l66384219r650294b4d6e9f3f1-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2010-01-07 16:26                   ` David Dillow
  0 siblings, 0 replies; 35+ messages in thread
From: David Dillow @ 2010-01-07 16:26 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: Roland Dreier, OFED mailing list, Chris Worley

On Thu, 2010-01-07 at 08:59 +0100, Bart Van Assche wrote:
> On Wed, Jan 6, 2010 at 10:40 PM, Roland Dreier <rdreier-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org> wrote:
> >
> >  > Is that regular kernel coding practice, to run away with the work
> >  > someone else did and to claim authorship ? As far as I know this is
> >  > considered as impolite.
> >
> > I don't see "with proper credit for Bart" as claiming authorship.  And
> > yes, I think proposing a better way of doing things is definitely
> > regular kernel coding practice.  It's more than polite -- it's going way
> > beyond helpful review comments and actually helping revise the patches.
> 
> You are welcome to have a look at the descriptions of the three
> patches posted by David Dillow. I was not credited in any way in these
> descriptions. Not for analyzing the problem, not for the design of a
> solution and not for the work I did in implementing a solution. That
> is why I wrote that David Dillow was claiming authorship of the work I
> did.

And here I thought I covered it by stating in the cover letter that they
were not signed off, needed credit for you, and shouldn't go upstream
until you ACK'd them, presumably with that fixed. I thought people would
put two and two together, but I suppose not. I'll try to be more
explicit next time.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] IB/srp: Fix initiator lockup
       [not found]                 ` <e2e108261001070003y6881eac7g3e8129e44b78a475-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2010-01-07 10:01                   ` Bart Van Assche
@ 2010-01-10  6:41                   ` Roland Dreier
       [not found]                     ` <adavdfapimb.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
  1 sibling, 1 reply; 35+ messages in thread
From: Roland Dreier @ 2010-01-10  6:41 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: OFED mailing list, Chris Worley


 > The patch I posted is really fixing the original bug. The problem was
 > that neither the SRP target nor the SRP initiator had support for
 > SRP_CRED_REQ. Support for SRP_CRED_REQ has to be added to both
 > software components in order to fix this bug.

There's no way for the target to return credits through responses?  I
agree that we should implement the full SRP spec in the initiator but it
seems unfortunate to force both an initiator and target upgrade to fix
what really appears to be a target bug.  This means anyone running a
pre-2.6.34 kernel won't be able to use the SCST SRP target reliably.

 - R.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] IB/srp: Fix initiator lockup
       [not found]                     ` <adavdfapimb.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
@ 2010-01-10 10:34                       ` Bart Van Assche
       [not found]                         ` <e2e108261001100234k7dcb45cescb9ae133c0cdc256-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 35+ messages in thread
From: Bart Van Assche @ 2010-01-10 10:34 UTC (permalink / raw)
  To: Roland Dreier; +Cc: OFED mailing list, Chris Worley

On Sun, Jan 10, 2010 at 7:41 AM, Roland Dreier <rdreier-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org> wrote:
>
>  > The patch I posted is really fixing the original bug. The problem was
>  > that neither the SRP target nor the SRP initiator had support for
>  > SRP_CRED_REQ. Support for SRP_CRED_REQ has to be added to both
>  > software components in order to fix this bug.
>
> There's no way for the target to return credits through responses?  I
> agree that we should implement the full SRP spec in the initiator but it
> seems unfortunate to force both an initiator and target upgrade to fix
> what really appears to be a target bug.  This means anyone running a
> pre-2.6.34 kernel won't be able to use the SCST SRP target reliably.

Please let me explain why the SCST SRP target behaves as observed, why
this behavior is not specific to SCST, and which workaround is
available for pre-2.6.34 SRP initiator users.

As known an SRP target passes the so-called req_lim value to the SRP
initiator via the REQUEST LIMIT DELTA field of the SRP_LOGIN_RSP
information unit. Let's call this value RL. As specified in the SRP
r16a document, an initiator may never send more than RL - 2 unanswered
SRP_CMD information units to an SRP target.

When an SRP_CMD request is being processed by an SRP target, the SRP
target can e.g. process this request using one of the following
strategies:
1. Using the buffer in which the SRP_CMD request was received to build
the response. In this case once the response has been built the target
will call ib_post_send() and will wait until the send completion has
been received before it will declare that buffer again available for
receiving by calling ib_post_recv().
2. Using separate sets of buffers for receiving SRP_CMD requests and
sending back SRP_RSP responses. In this case it is possible for the
target to re-enable receiving for the buffer in which the SRP_CMD
request was received before the SRP_RSP response is sent back.

Regarding approach (2): with this approach the value of the REQUEST
LIMIT DELTA field in the SRP_RSP information unit will always equal
one. With this approach it will never be necessary that the targets
sends an SRP_CRED_REQ information unit to the initiator.

Regarding approach (1): since for each SRP_RSP response sent back by
the target ib_post_recv() is called in the target after
ib_post_send(), at least for the first SRP_RSP response the REQUEST
LIMIT DELTA field will be equal to zero. And for a target that is able
to process all received SRP_CMD information units in parallel, it can
happen that the SRP target sends a contiguous series of (RL - 2)
SRP_RSP information units to the initiator with the REQUEST LIMIT
DELTA field equal to zero. As a consequence, the value of the req_lim
variable in the SRP initiator will be equal to 2 and the initiator
won't send any further SRP_CMD requests to the target. A scenario for
how to get the SRP initiator into this state can be found in
http://bugzilla.kernel.org/show_bug.cgi?id=14235.

The only way to get out of this deadlock is that the target send an
SRP_CRED_REQ information unit to the initiator with a non-zero REQUEST
LIMIT DELTA field, and that the SRP initiator processes this
SRP_CRED_REQ information unit.

Because of this possible SRP initiator lockup SCST-SRPT users have
been recommended to disable parallel processing of information units
in this SRP target (by specifying the ib_srpt kernel parameter
thread=1).

My conclusion is that the SRP initiator lockup explained in
http://bugzilla.kernel.org/show_bug.cgi?id=14235 is not specific to
SCST-SRPT but that this lockup can be triggered by any SRP target that
processes SRP_CMD requests in parallel.

So as far as I can see the choices we have are:
* Document that SRP_CRED_REQ support is missing in the Linux SRP
initiator and hence that command processing in SRP targets must be
complicated by making sure that never (RL - 2) contiguous SRP_RSP
information units are sent to the SRP initiator with the REQUEST LIMIT
DELTA field equal to zero.
* Add support for the SRP_CRED_REQ information unit in the Linux SRP initiator.

Note: I do not know of any SRP targets that implement approach (2). As
far as I know all SRP targets use approach (1).

Bart.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] IB/srp: Fix initiator lockup
       [not found]                         ` <e2e108261001100234k7dcb45cescb9ae133c0cdc256-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2010-01-12 17:13                           ` Roland Dreier
       [not found]                             ` <ada3a2bp7ow.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
  0 siblings, 1 reply; 35+ messages in thread
From: Roland Dreier @ 2010-01-12 17:13 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: OFED mailing list, Chris Worley


 > Note: I do not know of any SRP targets that implement approach (2). As
 > far as I know all SRP targets use approach (1).

But no other SRP targets send SRP_CRED_REQ -- and that includes all
native IB storage arrays that I know of.  So there must be some other
way around this (and having separate buffers for command receives and
response sends does seem like a sensible way to design a target anyway).
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] IB/srp: Fix initiator lockup
       [not found]                             ` <ada3a2bp7ow.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
@ 2010-01-12 17:23                               ` Bart Van Assche
       [not found]                                 ` <e2e108261001120923w4b405736v8b8e8ae41322823b-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 35+ messages in thread
From: Bart Van Assche @ 2010-01-12 17:23 UTC (permalink / raw)
  To: Roland Dreier; +Cc: OFED mailing list, Chris Worley

On Tue, Jan 12, 2010 at 6:13 PM, Roland Dreier <rdreier-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org> wrote:
>
>  > Note: I do not know of any SRP targets that implement approach (2). As
>  > far as I know all SRP targets use approach (1).
>
> But no other SRP targets send SRP_CRED_REQ -- and that includes all
> native IB storage arrays that I know of.  So there must be some other
> way around this (and having separate buffers for command receives and
> response sends does seem like a sensible way to design a target anyway).

Having separate buffer sets for command receives and response sends
might seem a sensible way to design a target. But calling
ib_post_recv() before sending a response back to the initiator is not
acceptable because this increases communication latency. When a target
uses separate buffer sets, sends back SRP_RSP before responses before
ib_post_recv() is called, and when it processes SRP_CMD requests in
parallel, it still can happen that a sequence of RL - 2 SRP_RSP
responses is sent back to the initiator all with the REQUEST LIMIT
DELTA field set to zero. So even with separate buffer sets there is a
need for SRP_CRED_REQ support in the initiator.

I'm surprised by all this opposition against adding SRP_CRED_REQ
support in the initiator, a feature defined in the SRP standard ?

Bart.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] IB/srp: Fix initiator lockup
       [not found]                                 ` <e2e108261001120923w4b405736v8b8e8ae41322823b-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2010-01-12 22:57                                   ` Roland Dreier
       [not found]                                     ` <adavdf7nd7k.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
  0 siblings, 1 reply; 35+ messages in thread
From: Roland Dreier @ 2010-01-12 22:57 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: OFED mailing list, Chris Worley


 > Having separate buffer sets for command receives and response sends
 > might seem a sensible way to design a target. But calling
 > ib_post_recv() before sending a response back to the initiator is not
 > acceptable because this increases communication latency. When a target
 > uses separate buffer sets, sends back SRP_RSP before responses before
 > ib_post_recv() is called, and when it processes SRP_CMD requests in
 > parallel, it still can happen that a sequence of RL - 2 SRP_RSP
 > responses is sent back to the initiator all with the REQUEST LIMIT
 > DELTA field set to zero. So even with separate buffer sets there is a
 > need for SRP_CRED_REQ support in the initiator.

I doubt you could benchmark the overhead of calling ib_post_recv() in
the full SRP protocol.  Really, I bet it's less than 100 nanoseconds to
form the work request and call ib_post_recv().  Maybe I'm wrong but I
really expect the overhead of actually doing IO (even to a ramdisk
buffer or something) is going to be much higher than posting a receive.

Or maybe you did benchmark it and I'm wrong?

 > I'm surprised by all this opposition against adding SRP_CRED_REQ
 > support in the initiator, a feature defined in the SRP standard ?

As I said I agree we should implement support for SRP_CRED_REQ.  However
I think it would be better if the SCST target could be made to work
reliably with pre-2.6.34 initiators as well.

 - R.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] IB/srp: Fix initiator lockup
       [not found]                                     ` <adavdf7nd7k.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
@ 2010-01-12 23:24                                       ` Jason Gunthorpe
       [not found]                                         ` <20100112232421.GA16490-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  2010-01-13  7:29                                       ` Bart Van Assche
  1 sibling, 1 reply; 35+ messages in thread
From: Jason Gunthorpe @ 2010-01-12 23:24 UTC (permalink / raw)
  To: Roland Dreier; +Cc: Bart Van Assche, OFED mailing list, Chris Worley

On Tue, Jan 12, 2010 at 02:57:35PM -0800, Roland Dreier wrote:

> I doubt you could benchmark the overhead of calling ib_post_recv() in
> the full SRP protocol.  Really, I bet it's less than 100 nanoseconds to
> form the work request and call ib_post_recv().  Maybe I'm wrong but I
> really expect the overhead of actually doing IO (even to a ramdisk
> buffer or something) is going to be much higher than posting a receive.
> 
> Or maybe you did benchmark it and I'm wrong?

I'd actually think that the decline caused by exhausting credit on
the initiator side is far worse than ib_post_recv.. If ib_post_recv
really is noticable then maybe just force loading the RQ after XX
unposted are outstanding.

I also was wondering if the SRP spec had anything to say about this,
creating this kind of RQ->SQ dependency on either side should only be
done if the protocol explicitly allows it for one side only -
otherwise you risk deadlocking when used with an initiator that felt
it too could create that kind of dependency.

Also, I couldn't tell for sure from a cursory examination of the
patch, but the initiator must be designed to not stall processing the
RQ dependent on the SQ or the credit level, when using a credit scheme
like this. Or you will deadlock.

For instance it isn't clear to me how the control flow around
srp_process_cred_req is ment to work - it tries to send a reply, but
if it can't due to srp_get_txp_iu failing it just gives up forever?

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] IB/srp: Fix initiator lockup
       [not found]                                         ` <20100112232421.GA16490-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2010-01-13  7:23                                           ` Bart Van Assche
       [not found]                                             ` <e2e108261001122323p695d8a9cscd992eda25fdba89-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2010-01-13  9:32                                           ` Bart Van Assche
  1 sibling, 1 reply; 35+ messages in thread
From: Bart Van Assche @ 2010-01-13  7:23 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Roland Dreier, OFED mailing list, Chris Worley

On Wed, Jan 13, 2010 at 12:24 AM, Jason Gunthorpe
<jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org> wrote:
> [ ... ]
>
> Also, I couldn't tell for sure from a cursory examination of the
> patch, but the initiator must be designed to not stall processing the
> RQ dependent on the SQ or the credit level, when using a credit scheme
> like this. Or you will deadlock.
>
> For instance it isn't clear to me how the control flow around
> srp_process_cred_req is ment to work - it tries to send a reply, but
> if it can't due to srp_get_txp_iu failing it just gives up forever?

For a standards-conforming SRP target, srp_get_txp_iu() will never
fail. A quote from section 5.5.2 of the SRP r16a document:

SRP target ports shall limit themselves to one outstanding SRP request
(see table 7) per RDMA channel. Upon
sending an SRP request, an SRP target port shall not send another SRP
request on the same RDMA channel
until after it receives the SRP response (see table 8) for the
previous SRP request.

Bart.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] IB/srp: Fix initiator lockup
       [not found]                                     ` <adavdf7nd7k.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
  2010-01-12 23:24                                       ` Jason Gunthorpe
@ 2010-01-13  7:29                                       ` Bart Van Assche
       [not found]                                         ` <e2e108261001122329u698f13ecibf42c1dc60b5cc04-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  1 sibling, 1 reply; 35+ messages in thread
From: Bart Van Assche @ 2010-01-13  7:29 UTC (permalink / raw)
  To: Roland Dreier; +Cc: OFED mailing list, Chris Worley

On Tue, Jan 12, 2010 at 11:57 PM, Roland Dreier <rdreier-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org> wrote:
>
>  > Having separate buffer sets for command receives and response sends
>  > might seem a sensible way to design a target. But calling
>  > ib_post_recv() before sending a response back to the initiator is not
>  > acceptable because this increases communication latency. When a target
>  > uses separate buffer sets, sends back SRP_RSP before responses before
>  > ib_post_recv() is called, and when it processes SRP_CMD requests in
>  > parallel, it still can happen that a sequence of RL - 2 SRP_RSP
>  > responses is sent back to the initiator all with the REQUEST LIMIT
>  > DELTA field set to zero. So even with separate buffer sets there is a
>  > need for SRP_CRED_REQ support in the initiator.
>
> I doubt you could benchmark the overhead of calling ib_post_recv() in
> the full SRP protocol.  Really, I bet it's less than 100 nanoseconds to
> form the work request and call ib_post_recv().  Maybe I'm wrong but I
> really expect the overhead of actually doing IO (even to a ramdisk
> buffer or something) is going to be much higher than posting a receive.
>
> Or maybe you did benchmark it and I'm wrong?

The impact of the ib_post_recv() call is small but measurable. I'll
publish measurement results soon.

>  > I'm surprised by all this opposition against adding SRP_CRED_REQ
>  > support in the initiator, a feature defined in the SRP standard ?
>
> As I said I agree we should implement support for SRP_CRED_REQ.  However
> I think it would be better if the SCST target could be made to work
> reliably with pre-2.6.34 initiators as well.

OK, I will first evaluate which alternatives there are for SCST-SRPT
such that it does not rely on initiator-side support for SRP_CRED_REQ.
This will not only help with pre-2.6.34 SRP initiators but also with
other SRP initiators than the one available in the vanilla Linux
kernel (e.g. OFED or WinOF).

Bart.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] IB/srp: Fix initiator lockup
       [not found]                                         ` <20100112232421.GA16490-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  2010-01-13  7:23                                           ` Bart Van Assche
@ 2010-01-13  9:32                                           ` Bart Van Assche
  1 sibling, 0 replies; 35+ messages in thread
From: Bart Van Assche @ 2010-01-13  9:32 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Roland Dreier, OFED mailing list, Chris Worley

On Wed, Jan 13, 2010 at 12:24 AM, Jason Gunthorpe
<jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org> wrote:
> I'd actually think that the decline caused by exhausting credit on
> the initiator side is far worse than ib_post_recv.. If ib_post_recv
> really is noticable then maybe just force loading the RQ after XX
> unposted are outstanding.

The performance penalty of exhausting credit on the initiator side is
indeed far worse than that of a single ib_post_recv() call at the
target side. But in a realistic setup such credit exhaustion happens
at a rate of about once an hour, while ib_post_recv() is called once
for every request. It is easy to set up an SRP I/O test where
ib_post_recv() is called more than 250.000 times per second.

Bart.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] IB/srp: Fix initiator lockup
       [not found]                                         ` <e2e108261001122329u698f13ecibf42c1dc60b5cc04-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2010-01-13 16:58                                           ` Roland Dreier
  0 siblings, 0 replies; 35+ messages in thread
From: Roland Dreier @ 2010-01-13 16:58 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: OFED mailing list, Chris Worley


 > The impact of the ib_post_recv() call is small but measurable. I'll
 > publish measurement results soon.

Interesting.  If there is overhead you could amortize it by keeping some
number of spare buffers posting, and batching your calls to
ib_post_recv() (eg post a list of 8 buffers every 8 commands, and keep 7
spare buffers posted).

 - R.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] IB/srp: Fix initiator lockup
       [not found]                                             ` <e2e108261001122323p695d8a9cscd992eda25fdba89-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2010-01-13 18:16                                               ` Jason Gunthorpe
       [not found]                                                 ` <20100113181615.GC16490-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 35+ messages in thread
From: Jason Gunthorpe @ 2010-01-13 18:16 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: Roland Dreier, OFED mailing list, Chris Worley

On Wed, Jan 13, 2010 at 08:23:27AM +0100, Bart Van Assche wrote:
> On Wed, Jan 13, 2010 at 12:24 AM, Jason Gunthorpe
> <jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org> wrote:
> > [ ... ]
> >
> > Also, I couldn't tell for sure from a cursory examination of the
> > patch, but the initiator must be designed to not stall processing the
> > RQ dependent on the SQ or the credit level, when using a credit scheme
> > like this. Or you will deadlock.
> >
> > For instance it isn't clear to me how the control flow around
> > srp_process_cred_req is ment to work - it tries to send a reply, but
> > if it can't due to srp_get_txp_iu failing it just gives up forever?
> 
> For a standards-conforming SRP target, srp_get_txp_iu() will never
> fail. A quote from section 5.5.2 of the SRP r16a document:

What guarantees that that the send completion for the reply is
processed before a receive completion for the next request?

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] IB/srp: Fix initiator lockup
       [not found]                                                 ` <20100113181615.GC16490-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2010-01-13 18:57                                                   ` Bart Van Assche
       [not found]                                                     ` <e2e108261001131057g339fc80fn638109f2d441f13a-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 35+ messages in thread
From: Bart Van Assche @ 2010-01-13 18:57 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Roland Dreier, OFED mailing list, Chris Worley

On Wed, Jan 13, 2010 at 7:16 PM, Jason Gunthorpe
<jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org> wrote:
>
> On Wed, Jan 13, 2010 at 08:23:27AM +0100, Bart Van Assche wrote:
> > On Wed, Jan 13, 2010 at 12:24 AM, Jason Gunthorpe
> > <jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org> wrote:
> > > [ ... ]
> > >
> > > Also, I couldn't tell for sure from a cursory examination of the
> > > patch, but the initiator must be designed to not stall processing the
> > > RQ dependent on the SQ or the credit level, when using a credit scheme
> > > like this. Or you will deadlock.
> > >
> > > For instance it isn't clear to me how the control flow around
> > > srp_process_cred_req is ment to work - it tries to send a reply, but
> > > if it can't due to srp_get_txp_iu failing it just gives up forever?
> >
> > For a standards-conforming SRP target, srp_get_txp_iu() will never
> > fail. A quote from section 5.5.2 of the SRP r16a document:
>
> What guarantees that that the send completion for the reply is
> processed before a receive completion for the next request?

Did you notice that I increased the number of receive slots reserved
for target-initiated requests from one to two ?

+       /* Number of receive slots reserved for receiving requests. */
+       SRP_RXR_SIZE            = 2,

Bart.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] IB/srp: Fix initiator lockup
       [not found]                                                     ` <e2e108261001131057g339fc80fn638109f2d441f13a-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2010-01-13 19:52                                                       ` Jason Gunthorpe
  0 siblings, 0 replies; 35+ messages in thread
From: Jason Gunthorpe @ 2010-01-13 19:52 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: Roland Dreier, OFED mailing list, Chris Worley

On Wed, Jan 13, 2010 at 07:57:26PM +0100, Bart Van Assche wrote:
> On Wed, Jan 13, 2010 at 7:16 PM, Jason Gunthorpe
> <jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org> wrote:
> >
> > On Wed, Jan 13, 2010 at 08:23:27AM +0100, Bart Van Assche wrote:
> > > On Wed, Jan 13, 2010 at 12:24 AM, Jason Gunthorpe
> > > <jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org> wrote:
> > > > [ ... ]
> > > >
> > > > Also, I couldn't tell for sure from a cursory examination of the
> > > > patch, but the initiator must be designed to not stall processing the
> > > > RQ dependent on the SQ or the credit level, when using a credit scheme
> > > > like this. Or you will deadlock.
> > > >
> > > > For instance it isn't clear to me how the control flow around
> > > > srp_process_cred_req is ment to work - it tries to send a reply, but
> > > > if it can't due to srp_get_txp_iu failing it just gives up forever?
> > >
> > > For a standards-conforming SRP target, srp_get_txp_iu() will never
> > > fail. A quote from section 5.5.2 of the SRP r16a document:
> >
> > What guarantees that that the send completion for the reply is
> > processed before a receive completion for the next request?
> 
> Did you notice that I increased the number of receive slots reserved
> for target-initiated requests from one to two ?

Yes, but I don't see how more receive slots helps exhausting send
slots.

The IBA doesn't guarentee ordering between completions originating
from different work queues. Getting a receive completion does not
guarantee that a related send completion has been received. Even if
the work queues are mapped to the same CQ.

Since srp_get_txp_iu depends on txp_head/tail which is incremented by
send completions, but srp_get_txp_iu is called on the receive
completion path, it is possible to call srp_get_txp_iu multiple times
before the send completions are generated - even though the target is
only generating new request sends in response to reply receives.

The two main causes are ack coalescing algorithms and lost ack
packets.

That said, with SRP_TXP_SIZE set to 2, I think it would be really hard
to break this in real life, with current hardware.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

end of thread, other threads:[~2010-01-13 19:52 UTC | newest]

Thread overview: 35+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-01-02 12:19 [PATCH] IB/srp: Fix initiator lockup Bart Van Assche
     [not found] ` <e2e108261001020419l36319156hb9d625edc2e15d06-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-01-02 16:05   ` Fwd: " Chris Worley
     [not found]     ` <f3177b9e1001020805k4dce1991u3733c5a7f6d46aaa-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-01-02 17:52       ` Bart Van Assche
2010-01-04  1:34   ` David Dillow
     [not found]     ` <1262568846.13289.4.camel-1q1vX8mYZiGLUyTwlgNVppKKF0rrzTr+@public.gmane.org>
2010-01-04  1:43       ` [RFC PATCH 1/3] IB/srp: differentiate the uses of SRP_SQ_SIZE David Dillow
2010-01-04  1:43       ` [RFC PATCH 2/3] IB/srp: minimal support for SRP_CRED_REQ and SRP_AER_REQ David Dillow
     [not found]         ` <1262569417-20341-2-git-send-email-dave-i1Mk8JYDVaaSihdK6806/g@public.gmane.org>
2010-01-06 21:48           ` Roland Dreier
     [not found]             ` <adar5q2sy5q.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
2010-01-06 23:02               ` David Dillow
     [not found]                 ` <1262818979.2265.13.camel-FqX9LgGZnHWDB2HL1qBt2PIbXMQ5te18@public.gmane.org>
2010-01-06 23:20                   ` Roland Dreier
2010-01-04  1:43       ` [RFC PATCH 3/3] IB/srp: export req_limit via sysfs David Dillow
2010-01-04  7:13       ` [PATCH] IB/srp: Fix initiator lockup Bart Van Assche
     [not found]         ` <e2e108261001032313v472d4b9dm75c3d0b1a9268adc-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-01-04  8:12           ` David Dillow
     [not found]             ` <1262592761.13289.13.camel-1q1vX8mYZiGLUyTwlgNVppKKF0rrzTr+@public.gmane.org>
2010-01-04  8:32               ` Bart Van Assche
2010-01-06 21:40           ` Roland Dreier
     [not found]             ` <adavdfesyi4.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
2010-01-07  7:59               ` Bart Van Assche
     [not found]                 ` <e2e108261001062359l66384219r650294b4d6e9f3f1-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-01-07 16:26                   ` David Dillow
2010-01-06 21:38       ` Roland Dreier
2010-01-06 21:37   ` Roland Dreier
     [not found]     ` <ada4omyud7c.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
2010-01-07  7:52       ` Bart Van Assche
     [not found]         ` <e2e108261001062352l1925f4e1i24bdbdde08056c4c-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-01-07  7:57           ` Roland Dreier
     [not found]             ` <adak4vuqrcw.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
2010-01-07  8:03               ` Bart Van Assche
     [not found]                 ` <e2e108261001070003y6881eac7g3e8129e44b78a475-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-01-07 10:01                   ` Bart Van Assche
2010-01-10  6:41                   ` Roland Dreier
     [not found]                     ` <adavdfapimb.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
2010-01-10 10:34                       ` Bart Van Assche
     [not found]                         ` <e2e108261001100234k7dcb45cescb9ae133c0cdc256-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-01-12 17:13                           ` Roland Dreier
     [not found]                             ` <ada3a2bp7ow.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
2010-01-12 17:23                               ` Bart Van Assche
     [not found]                                 ` <e2e108261001120923w4b405736v8b8e8ae41322823b-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-01-12 22:57                                   ` Roland Dreier
     [not found]                                     ` <adavdf7nd7k.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
2010-01-12 23:24                                       ` Jason Gunthorpe
     [not found]                                         ` <20100112232421.GA16490-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2010-01-13  7:23                                           ` Bart Van Assche
     [not found]                                             ` <e2e108261001122323p695d8a9cscd992eda25fdba89-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-01-13 18:16                                               ` Jason Gunthorpe
     [not found]                                                 ` <20100113181615.GC16490-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2010-01-13 18:57                                                   ` Bart Van Assche
     [not found]                                                     ` <e2e108261001131057g339fc80fn638109f2d441f13a-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-01-13 19:52                                                       ` Jason Gunthorpe
2010-01-13  9:32                                           ` Bart Van Assche
2010-01-13  7:29                                       ` Bart Van Assche
     [not found]                                         ` <e2e108261001122329u698f13ecibf42c1dc60b5cc04-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-01-13 16:58                                           ` Roland Dreier

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox