public inbox for linux-rdma@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] ipoib: clean ib tx ring periodically
@ 2017-02-16 15:35 Paolo Abeni
       [not found] ` <589591340739f0ceeea9ca449b6de3df01caadc4.1487259121.git.pabeni-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 6+ messages in thread
From: Paolo Abeni @ 2017-02-16 15:35 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA
  Cc: Doug Ledford, Sean Hefty, Hal Rosenstock

The skbs transmitted via ipoib_send() are freed only if there are
16 or more outstanding work requests or if the send queue is full.

If there is very little networking activity, the transmitted skbs
can be held by the device driver for an unlimited amount of time,
starving other subsystems.

E.g. assuming the ipv6 is enabled, with the following sequence:

systemctl start firewalld
modprobe ib_ipoib
ip addr add dev ib0 fc00::1/64
systemctl stop firewalld

a cpu will hang: rmmod conntrack will keep a core busy
spinning for nf_conntrack_untracked going to 0, since some ICMP6
ND packets are generated and transmitted when the ipv6 address
is attached to the device, and such packets get a notrack ct
entry.

This change address the issue introducing a periodic timer performing
"garbage collection" on the send ring at low frequency (once every
second).

This new timer runs independently from the currently used poll_timer,
so that no additional delay is introduced to clean the ring after
errors or ring full event.

Reported-by: Thomas Cameron <tcameron-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
Fixes: f56bcd801356 ("IPoIB: Use separate CQ for UD send completions")
Signed-off-by: Paolo Abeni <pabeni-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
---
 drivers/infiniband/ulp/ipoib/ipoib.h    |  2 ++
 drivers/infiniband/ulp/ipoib/ipoib_ib.c | 35 +++++++++++++++++++++++++++------
 2 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index da12717..3b5039b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -402,6 +402,8 @@ struct ipoib_dev_priv {
 	u64	hca_caps;
 	struct ipoib_ethtool_st ethtool;
 	struct timer_list poll_timer;
+	struct timer_list tx_gc_timer;
+	unsigned long drain_tx_cq_stamp;
 	unsigned max_send_sge;
 	bool sm_fullmember_sendonly_support;
 };
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 5038f9d..e565848 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -490,13 +490,20 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
 	napi_schedule(&priv->napi);
 }
 
+static void __drain_tx_cq(struct ipoib_dev_priv *priv)
+{
+	while (poll_tx(priv))
+		; /* nothing */
+
+	priv->drain_tx_cq_stamp = jiffies;
+}
+
 static void drain_tx_cq(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
 	netif_tx_lock(dev);
-	while (poll_tx(priv))
-		; /* nothing */
+	__drain_tx_cq(priv);
 
 	if (netif_queue_stopped(dev))
 		mod_timer(&priv->poll_timer, jiffies + 1);
@@ -637,8 +644,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 	}
 
 	if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
-		while (poll_tx(priv))
-			; /* nothing */
+		__drain_tx_cq(priv);
 }
 
 static void __ipoib_reap_ah(struct net_device *dev)
@@ -697,6 +703,19 @@ static void ipoib_ib_tx_timer_func(unsigned long ctx)
 	drain_tx_cq((struct net_device *)ctx);
 }
 
+static void ipoib_tx_gc_timer_func(unsigned long ctx)
+{
+	struct net_device *dev = (struct net_device *)ctx;
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+	netif_tx_lock(dev);
+	if (time_after(jiffies, priv->drain_tx_cq_stamp + HZ))
+		__drain_tx_cq(priv);
+	netif_tx_unlock(dev);
+
+	mod_timer(&priv->tx_gc_timer, jiffies + HZ);
+}
+
 int ipoib_ib_dev_open(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -834,8 +853,7 @@ void ipoib_drain_cq(struct net_device *dev)
 		}
 	} while (n == IPOIB_NUM_WC);
 
-	while (poll_tx(priv))
-		; /* nothing */
+	__drain_tx_cq(priv);
 
 	local_bh_enable();
 }
@@ -906,6 +924,7 @@ int ipoib_ib_dev_stop(struct net_device *dev)
 
 timeout:
 	del_timer_sync(&priv->poll_timer);
+	del_timer_sync(&priv->tx_gc_timer);
 	qp_attr.qp_state = IB_QPS_RESET;
 	if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
 		ipoib_warn(priv, "Failed to modify QP to RESET state\n");
@@ -932,6 +951,10 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
 
 	setup_timer(&priv->poll_timer, ipoib_ib_tx_timer_func,
 		    (unsigned long) dev);
+	setup_timer(&priv->tx_gc_timer, ipoib_tx_gc_timer_func,
+		    (unsigned long) dev);
+	mod_timer(&priv->tx_gc_timer, jiffies + HZ);
+	priv->drain_tx_cq_stamp = jiffies;
 
 	if (dev->flags & IFF_UP) {
 		if (ipoib_ib_dev_open(dev)) {
-- 
2.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-03-01  9:39 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-02-16 15:35 [PATCH] ipoib: clean ib tx ring periodically Paolo Abeni
     [not found] ` <589591340739f0ceeea9ca449b6de3df01caadc4.1487259121.git.pabeni-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2017-02-16 17:09   ` Leon Romanovsky
     [not found]     ` <20170216170946.GZ6989-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2017-02-16 17:47       ` Paolo Abeni
2017-03-01  7:28   ` Erez Shitrit
     [not found]     ` <CAAk-MO_J2JL9YvNEV24LJik8qGKrBHBW+2NtzAOL+QEve1CMJg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-03-01  9:07       ` Paolo Abeni
     [not found]         ` <1488359256.2607.2.camel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2017-03-01  9:39           ` Erez Shitrit

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox