All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Add exponential backoff + random delay to MADs when retrying after timeout.
@ 2010-10-26 18:33 Mike Heinz
       [not found] ` <4C2744E8AD2982428C5BFE523DF8CDCB49D4675DEB-amwN6d8PyQWXx9kJd3VG2h2eb7JE58TQ@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Mike Heinz @ 2010-10-26 18:33 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org

[-- Attachment #1: Type: text/plain, Size: 6784 bytes --]

Resending. Didn't get any reply after the last posting.

-----Original Message-----
From: Mike Heinz 
Sent: Monday, October 11, 2010 11:34 AM
To: 'linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org'
Subject: [PATCH] Add exponential backoff + random delay to MADs when retrying after timeout.

This patch builds upon a discussion we had earlier this year on adding a backoff function when retrying MAD sends after a timeout.

This patch does NOT implement the ABI/API changes that would be needed to take advantage of the new features, but it lays the groundwork for doing so. In addition, it provides a new module parameter that allow the administrator to coerce existing code into using the new capability.

First, I've added a new field called "randomized_wait" to the ib_mad_send_buf structure. If this parameter is set, each time the WR times out, the the timeout for the next retry is set to (send_wr->timeout_ms + 511<<(send_wr->retries) - random32()&511). In other words, on the first retry, the randomization code will add between 0 and 1/2 second to the timeout. On the second retry, it will add between 1 and 1.5 seconds to the timeout, on the 3rd, between 2 and 2.5 seconds, on the 4th, between 4 and 4.5, et cetera. In addition, a new field, total_timeout has been added to the ib_mad_send_wr_private and is initialized to (send_wr->timeout * send_wr->max_retries). Retries cannot exceed this total time, even though that will mean a lower number of retry attempts.

Finally, I've added a module parameter to coerce all mad work requests to use this feature if desired.

parm:           randomized_wait:When true, use a randomized backoff algorithm to control retries for timeouts. (int)

--------

diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index ef1304f..3b03f1c 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -42,6 +42,11 @@
 #include "smi.h"
 #include "agent.h"
 
+#include "linux/random.h"
+
+#define MAD_MIN_TIMEOUT_MS 511
+#define MAD_RAND_TIMEOUT_MS 511
+
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("kernel IB MAD API");  MODULE_AUTHOR("Hal Rosenstock"); @@ -55,6 +60,10 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests  module_param_named(recv_queue_size, mad_recvq_size, int, 0444);  MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
 
+int mad_randomized_wait = 0;
+module_param_named(randomized_wait, mad_randomized_wait, int, 0444); 
+MODULE_PARM_DESC(randomized_wait, "When true, use a randomized backoff 
+algorithm to control retries for timeouts.");
+
 static struct kmem_cache *ib_mad_cache;
 
 static struct list_head ib_mad_port_list; @@ -1102,11 +1111,18 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
 		}
 
 		mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid;
+
+		mad_send_wr->randomized_wait = mad_randomized_wait || send_buf->randomized_wait;
+		mad_send_wr->total_timeout = msecs_to_jiffies(send_buf->timeout_ms) * 
+send_buf->retries;
+		
 		/* Timeout will be updated after send completes */
 		mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms);
+
 		mad_send_wr->max_retries = send_buf->retries;
 		mad_send_wr->retries_left = send_buf->retries;
+		
 		send_buf->retries = 0;
+		
 		/* Reference for work request to QP + response */
 		mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
 		mad_send_wr->status = IB_WC_SUCCESS;
@@ -1803,6 +1819,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
 
 	/* Complete corresponding request */
 	if (ib_response_mad(mad_recv_wc->recv_buf.mad)) {
+
 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
 		mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc);
 		if (!mad_send_wr) {
@@ -1811,6 +1828,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
 			deref_mad_agent(mad_agent_priv);
 			return;
 		}
+
 		ib_mark_mad_done(mad_send_wr);
 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 
@@ -2429,14 +2447,33 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)  {
 	int ret;
 
-	if (!mad_send_wr->retries_left)
+	if (!mad_send_wr->retries_left || (mad_send_wr->total_timeout == 0))
 		return -ETIMEDOUT;
 
 	mad_send_wr->retries_left--;
 	mad_send_wr->send_buf.retries++;
 
-	mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
+	if (mad_send_wr->randomized_wait) {
+		mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms +
+			(MAD_MIN_TIMEOUT_MS<<mad_send_wr->send_buf.retries) -
+			(random32()&MAD_RAND_TIMEOUT_MS));
+		if (mad_send_wr->timeout > mad_send_wr->total_timeout) {
+			mad_send_wr->timeout = mad_send_wr->total_timeout;
+			mad_send_wr->total_timeout = 0;
+		} else {
+			mad_send_wr->total_timeout -= mad_send_wr->timeout;
+		}
+	} else {
+		mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
+	}
 
+	printk(KERN_DEBUG PFX "Retrying send %p: retries: %u, retries_left: %u, timeout: %lu, total_timeout: %lu\n",
+		mad_send_wr,
+		mad_send_wr->send_buf.retries,
+		mad_send_wr->retries_left,
+		mad_send_wr->timeout,
+		mad_send_wr->total_timeout);
+		
 	if (mad_send_wr->mad_agent_priv->agent.rmpp_version) {
 		ret = ib_retry_rmpp(mad_send_wr);
 		switch (ret) {
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 9430ab4..01fb7ed 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -132,8 +132,10 @@ struct ib_mad_send_wr_private {
 	struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
 	__be64 tid;
 	unsigned long timeout;
+	unsigned long total_timeout;
 	int max_retries;
 	int retries_left;
+	int randomized_wait;
 	int retry;
 	int refcount;
 	enum ib_wc_status status;
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index d3b9401..c3d6efb 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -77,6 +77,15 @@
 
 #define IB_MGMT_MAX_METHODS			128
 
+/* MAD Status field bit masks */
+#define IB_MGMT_MAD_STATUS_SUCCESS						0x0000
+#define IB_MGMT_MAD_STATUS_BUSY							0x0001
+#define IB_MGMT_MAD_STATUS_REDIRECT_REQD				0x0002
+#define IB_MGMT_MAD_STATUS_BAD_VERERSION				0x0004	
+#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD			0x0008	
+#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB	0x000c
+#define IB_MGMT_MAD_STATUS_INVALID_ATTRIB_VALUE			0x001c
+
 /* RMPP information */
 #define IB_MGMT_RMPP_VERSION			1
 
@@ -246,6 +255,7 @@ struct ib_mad_send_buf {
 	int			seg_count;
 	int			seg_size;
 	int			timeout_ms;
+	int			randomized_wait;
 	int			retries;
 };


[-- Attachment #2: randomized_mad_timeout.patch --]
[-- Type: application/octet-stream, Size: 4914 bytes --]

diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index ef1304f..3b03f1c 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -42,6 +42,11 @@
 #include "smi.h"
 #include "agent.h"
 
+#include "linux/random.h"
+
+#define MAD_MIN_TIMEOUT_MS 511
+#define MAD_RAND_TIMEOUT_MS 511
+
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("kernel IB MAD API");
 MODULE_AUTHOR("Hal Rosenstock");
@@ -55,6 +60,10 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests
 module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
 MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
 
+int mad_randomized_wait = 0;
+module_param_named(randomized_wait, mad_randomized_wait, int, 0444);
+MODULE_PARM_DESC(randomized_wait, "When true, use a randomized backoff algorithm to control retries for timeouts.");
+
 static struct kmem_cache *ib_mad_cache;
 
 static struct list_head ib_mad_port_list;
@@ -1102,11 +1111,18 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
 		}
 
 		mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid;
+
+		mad_send_wr->randomized_wait = mad_randomized_wait || send_buf->randomized_wait;
+		mad_send_wr->total_timeout = msecs_to_jiffies(send_buf->timeout_ms) * send_buf->retries;
+		
 		/* Timeout will be updated after send completes */
 		mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms);
+
 		mad_send_wr->max_retries = send_buf->retries;
 		mad_send_wr->retries_left = send_buf->retries;
+		
 		send_buf->retries = 0;
+		
 		/* Reference for work request to QP + response */
 		mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
 		mad_send_wr->status = IB_WC_SUCCESS;
@@ -1803,6 +1819,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
 
 	/* Complete corresponding request */
 	if (ib_response_mad(mad_recv_wc->recv_buf.mad)) {
+
 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
 		mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc);
 		if (!mad_send_wr) {
@@ -1811,6 +1828,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
 			deref_mad_agent(mad_agent_priv);
 			return;
 		}
+
 		ib_mark_mad_done(mad_send_wr);
 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 
@@ -2429,14 +2447,33 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
 {
 	int ret;
 
-	if (!mad_send_wr->retries_left)
+	if (!mad_send_wr->retries_left || (mad_send_wr->total_timeout == 0))
 		return -ETIMEDOUT;
 
 	mad_send_wr->retries_left--;
 	mad_send_wr->send_buf.retries++;
 
-	mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
+	if (mad_send_wr->randomized_wait) {
+		mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms +
+			(MAD_MIN_TIMEOUT_MS<<mad_send_wr->send_buf.retries) -
+			(random32()&MAD_RAND_TIMEOUT_MS));
+		if (mad_send_wr->timeout > mad_send_wr->total_timeout) {
+			mad_send_wr->timeout = mad_send_wr->total_timeout;
+			mad_send_wr->total_timeout = 0;
+		} else {
+			mad_send_wr->total_timeout -= mad_send_wr->timeout;
+		}
+	} else {
+		mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
+	}
 
+	printk(KERN_DEBUG PFX "Retrying send %p: retries: %u, retries_left: %u, timeout: %lu, total_timeout: %lu\n",
+		mad_send_wr,
+		mad_send_wr->send_buf.retries,
+		mad_send_wr->retries_left,
+		mad_send_wr->timeout,
+		mad_send_wr->total_timeout);
+		
 	if (mad_send_wr->mad_agent_priv->agent.rmpp_version) {
 		ret = ib_retry_rmpp(mad_send_wr);
 		switch (ret) {
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 9430ab4..01fb7ed 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -132,8 +132,10 @@ struct ib_mad_send_wr_private {
 	struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
 	__be64 tid;
 	unsigned long timeout;
+	unsigned long total_timeout;
 	int max_retries;
 	int retries_left;
+	int randomized_wait;
 	int retry;
 	int refcount;
 	enum ib_wc_status status;
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index d3b9401..c3d6efb 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -77,6 +77,15 @@
 
 #define IB_MGMT_MAX_METHODS			128
 
+/* MAD Status field bit masks */
+#define IB_MGMT_MAD_STATUS_SUCCESS						0x0000
+#define IB_MGMT_MAD_STATUS_BUSY							0x0001
+#define IB_MGMT_MAD_STATUS_REDIRECT_REQD				0x0002
+#define IB_MGMT_MAD_STATUS_BAD_VERERSION				0x0004	
+#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD			0x0008	
+#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB	0x000c
+#define IB_MGMT_MAD_STATUS_INVALID_ATTRIB_VALUE			0x001c
+
 /* RMPP information */
 #define IB_MGMT_RMPP_VERSION			1
 
@@ -246,6 +255,7 @@ struct ib_mad_send_buf {
 	int			seg_count;
 	int			seg_size;
 	int			timeout_ms;
+	int			randomized_wait;
 	int			retries;
 };
 

^ permalink raw reply related	[flat|nested] 8+ messages in thread
* [PATCH] Add exponential backoff + random delay to MADs when retrying after timeout.
@ 2010-10-11 15:34 Mike Heinz
  0 siblings, 0 replies; 8+ messages in thread
From: Mike Heinz @ 2010-10-11 15:34 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org

[-- Attachment #1: Type: text/plain, Size: 6484 bytes --]

This patch builds upon a discussion we had earlier this year on adding a backoff function when retrying MAD sends after a timeout.

This patch does NOT implement the ABI/API changes that would be needed to take advantage of the new features, but it lays the groundwork for doing so. In addition, it provides a new module parameter that allow the administrator to coerce existing code into using the new capability.

First, I've added a new field called "randomized_wait" to the ib_mad_send_buf structure. If this parameter is set, each time the WR times out, the the timeout for the next retry is set to (send_wr->timeout_ms + 511<<(send_wr->retries) - random32()&511). In other words, on the first retry, the randomization code will add between 0 and 1/2 second to the timeout. On the second retry, it will add between 1 and 1.5 seconds to the timeout, on the 3rd, between 2 and 2.5 seconds, on the 4th, between 4 and 4.5, et cetera. In addition, a new field, total_timeout has been added to the ib_mad_send_wr_private and is initialized to (send_wr->timeout * send_wr->max_retries). Retries cannot exceed this total time, even though that will mean a lower number of retry attempts.

Finally, I've added a module parameter to coerce all mad work requests to use this feature if desired.

parm:           randomized_wait:When true, use a randomized backoff algorithm to control retries for timeouts. (int)

--------

diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index ef1304f..3b03f1c 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -42,6 +42,11 @@
 #include "smi.h"
 #include "agent.h"
 
+#include "linux/random.h"
+
+#define MAD_MIN_TIMEOUT_MS 511
+#define MAD_RAND_TIMEOUT_MS 511
+
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("kernel IB MAD API");
 MODULE_AUTHOR("Hal Rosenstock");
@@ -55,6 +60,10 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests
 module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
 MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
 
+int mad_randomized_wait = 0;
+module_param_named(randomized_wait, mad_randomized_wait, int, 0444);
+MODULE_PARM_DESC(randomized_wait, "When true, use a randomized backoff algorithm to control retries for timeouts.");
+
 static struct kmem_cache *ib_mad_cache;
 
 static struct list_head ib_mad_port_list;
@@ -1102,11 +1111,18 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
 		}
 
 		mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid;
+
+		mad_send_wr->randomized_wait = mad_randomized_wait || send_buf->randomized_wait;
+		mad_send_wr->total_timeout = msecs_to_jiffies(send_buf->timeout_ms) * send_buf->retries;
+		
 		/* Timeout will be updated after send completes */
 		mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms);
+
 		mad_send_wr->max_retries = send_buf->retries;
 		mad_send_wr->retries_left = send_buf->retries;
+		
 		send_buf->retries = 0;
+		
 		/* Reference for work request to QP + response */
 		mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
 		mad_send_wr->status = IB_WC_SUCCESS;
@@ -1803,6 +1819,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
 
 	/* Complete corresponding request */
 	if (ib_response_mad(mad_recv_wc->recv_buf.mad)) {
+
 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
 		mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc);
 		if (!mad_send_wr) {
@@ -1811,6 +1828,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
 			deref_mad_agent(mad_agent_priv);
 			return;
 		}
+
 		ib_mark_mad_done(mad_send_wr);
 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 
@@ -2429,14 +2447,33 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
 {
 	int ret;
 
-	if (!mad_send_wr->retries_left)
+	if (!mad_send_wr->retries_left || (mad_send_wr->total_timeout == 0))
 		return -ETIMEDOUT;
 
 	mad_send_wr->retries_left--;
 	mad_send_wr->send_buf.retries++;
 
-	mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
+	if (mad_send_wr->randomized_wait) {
+		mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms +
+			(MAD_MIN_TIMEOUT_MS<<mad_send_wr->send_buf.retries) -
+			(random32()&MAD_RAND_TIMEOUT_MS));
+		if (mad_send_wr->timeout > mad_send_wr->total_timeout) {
+			mad_send_wr->timeout = mad_send_wr->total_timeout;
+			mad_send_wr->total_timeout = 0;
+		} else {
+			mad_send_wr->total_timeout -= mad_send_wr->timeout;
+		}
+	} else {
+		mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
+	}
 
+	printk(KERN_DEBUG PFX "Retrying send %p: retries: %u, retries_left: %u, timeout: %lu, total_timeout: %lu\n",
+		mad_send_wr,
+		mad_send_wr->send_buf.retries,
+		mad_send_wr->retries_left,
+		mad_send_wr->timeout,
+		mad_send_wr->total_timeout);
+		
 	if (mad_send_wr->mad_agent_priv->agent.rmpp_version) {
 		ret = ib_retry_rmpp(mad_send_wr);
 		switch (ret) {
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 9430ab4..01fb7ed 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -132,8 +132,10 @@ struct ib_mad_send_wr_private {
 	struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
 	__be64 tid;
 	unsigned long timeout;
+	unsigned long total_timeout;
 	int max_retries;
 	int retries_left;
+	int randomized_wait;
 	int retry;
 	int refcount;
 	enum ib_wc_status status;
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index d3b9401..c3d6efb 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -77,6 +77,15 @@
 
 #define IB_MGMT_MAX_METHODS			128
 
+/* MAD Status field bit masks */
+#define IB_MGMT_MAD_STATUS_SUCCESS						0x0000
+#define IB_MGMT_MAD_STATUS_BUSY							0x0001
+#define IB_MGMT_MAD_STATUS_REDIRECT_REQD				0x0002
+#define IB_MGMT_MAD_STATUS_BAD_VERERSION				0x0004	
+#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD			0x0008	
+#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB	0x000c
+#define IB_MGMT_MAD_STATUS_INVALID_ATTRIB_VALUE			0x001c
+
 /* RMPP information */
 #define IB_MGMT_RMPP_VERSION			1
 
@@ -246,6 +255,7 @@ struct ib_mad_send_buf {
 	int			seg_count;
 	int			seg_size;
 	int			timeout_ms;
+	int			randomized_wait;
 	int			retries;
 };


[-- Attachment #2: randomized_mad_timeout.patch --]
[-- Type: application/octet-stream, Size: 4914 bytes --]

diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index ef1304f..3b03f1c 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -42,6 +42,11 @@
 #include "smi.h"
 #include "agent.h"
 
+#include "linux/random.h"
+
+#define MAD_MIN_TIMEOUT_MS 511
+#define MAD_RAND_TIMEOUT_MS 511
+
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("kernel IB MAD API");
 MODULE_AUTHOR("Hal Rosenstock");
@@ -55,6 +60,10 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests
 module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
 MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
 
+int mad_randomized_wait = 0;
+module_param_named(randomized_wait, mad_randomized_wait, int, 0444);
+MODULE_PARM_DESC(randomized_wait, "When true, use a randomized backoff algorithm to control retries for timeouts.");
+
 static struct kmem_cache *ib_mad_cache;
 
 static struct list_head ib_mad_port_list;
@@ -1102,11 +1111,18 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
 		}
 
 		mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid;
+
+		mad_send_wr->randomized_wait = mad_randomized_wait || send_buf->randomized_wait;
+		mad_send_wr->total_timeout = msecs_to_jiffies(send_buf->timeout_ms) * send_buf->retries;
+		
 		/* Timeout will be updated after send completes */
 		mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms);
+
 		mad_send_wr->max_retries = send_buf->retries;
 		mad_send_wr->retries_left = send_buf->retries;
+		
 		send_buf->retries = 0;
+		
 		/* Reference for work request to QP + response */
 		mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
 		mad_send_wr->status = IB_WC_SUCCESS;
@@ -1803,6 +1819,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
 
 	/* Complete corresponding request */
 	if (ib_response_mad(mad_recv_wc->recv_buf.mad)) {
+
 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
 		mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc);
 		if (!mad_send_wr) {
@@ -1811,6 +1828,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
 			deref_mad_agent(mad_agent_priv);
 			return;
 		}
+
 		ib_mark_mad_done(mad_send_wr);
 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 
@@ -2429,14 +2447,33 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
 {
 	int ret;
 
-	if (!mad_send_wr->retries_left)
+	if (!mad_send_wr->retries_left || (mad_send_wr->total_timeout == 0))
 		return -ETIMEDOUT;
 
 	mad_send_wr->retries_left--;
 	mad_send_wr->send_buf.retries++;
 
-	mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
+	if (mad_send_wr->randomized_wait) {
+		mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms +
+			(MAD_MIN_TIMEOUT_MS<<mad_send_wr->send_buf.retries) -
+			(random32()&MAD_RAND_TIMEOUT_MS));
+		if (mad_send_wr->timeout > mad_send_wr->total_timeout) {
+			mad_send_wr->timeout = mad_send_wr->total_timeout;
+			mad_send_wr->total_timeout = 0;
+		} else {
+			mad_send_wr->total_timeout -= mad_send_wr->timeout;
+		}
+	} else {
+		mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
+	}
 
+	printk(KERN_DEBUG PFX "Retrying send %p: retries: %u, retries_left: %u, timeout: %lu, total_timeout: %lu\n",
+		mad_send_wr,
+		mad_send_wr->send_buf.retries,
+		mad_send_wr->retries_left,
+		mad_send_wr->timeout,
+		mad_send_wr->total_timeout);
+		
 	if (mad_send_wr->mad_agent_priv->agent.rmpp_version) {
 		ret = ib_retry_rmpp(mad_send_wr);
 		switch (ret) {
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 9430ab4..01fb7ed 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -132,8 +132,10 @@ struct ib_mad_send_wr_private {
 	struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
 	__be64 tid;
 	unsigned long timeout;
+	unsigned long total_timeout;
 	int max_retries;
 	int retries_left;
+	int randomized_wait;
 	int retry;
 	int refcount;
 	enum ib_wc_status status;
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index d3b9401..c3d6efb 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -77,6 +77,15 @@
 
 #define IB_MGMT_MAX_METHODS			128
 
+/* MAD Status field bit masks */
+#define IB_MGMT_MAD_STATUS_SUCCESS						0x0000
+#define IB_MGMT_MAD_STATUS_BUSY							0x0001
+#define IB_MGMT_MAD_STATUS_REDIRECT_REQD				0x0002
+#define IB_MGMT_MAD_STATUS_BAD_VERERSION				0x0004	
+#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD			0x0008	
+#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB	0x000c
+#define IB_MGMT_MAD_STATUS_INVALID_ATTRIB_VALUE			0x001c
+
 /* RMPP information */
 #define IB_MGMT_RMPP_VERSION			1
 
@@ -246,6 +255,7 @@ struct ib_mad_send_buf {
 	int			seg_count;
 	int			seg_size;
 	int			timeout_ms;
+	int			randomized_wait;
 	int			retries;
 };
 

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2010-11-03 17:22 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-10-26 18:33 [PATCH] Add exponential backoff + random delay to MADs when retrying after timeout Mike Heinz
     [not found] ` <4C2744E8AD2982428C5BFE523DF8CDCB49D4675DEB-amwN6d8PyQWXx9kJd3VG2h2eb7JE58TQ@public.gmane.org>
2010-10-28 13:12   ` Hal Rosenstock
     [not found]     ` <4CC976CC.1080009-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2010-11-02 21:58       ` [PATCH v2] " Mike Heinz
     [not found]         ` <4C2744E8AD2982428C5BFE523DF8CDCB4A207A80C5-amwN6d8PyQWXx9kJd3VG2h2eb7JE58TQ@public.gmane.org>
2010-11-02 23:12           ` Hefty, Sean
     [not found]             ` <CF9C39F99A89134C9CF9C4CCB68B8DDF25B83084D5-osO9UTpF0USkrb+BlOpmy7fspsVTdybXVpNB7YpNyf8@public.gmane.org>
2010-11-03 15:36               ` Mike Heinz
     [not found]                 ` <4C2744E8AD2982428C5BFE523DF8CDCB4A207A813F-amwN6d8PyQWXx9kJd3VG2h2eb7JE58TQ@public.gmane.org>
2010-11-03 16:03                   ` Hefty, Sean
     [not found]                     ` <CF9C39F99A89134C9CF9C4CCB68B8DDF25B8308894-osO9UTpF0USkrb+BlOpmy7fspsVTdybXVpNB7YpNyf8@public.gmane.org>
2010-11-03 17:22                       ` Mike Heinz
  -- strict thread matches above, loose matches on Subject: below --
2010-10-11 15:34 [PATCH] " Mike Heinz

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.