linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH fixes/for-3.6 0/3] few more IB fixes for 3.6
@ 2012-08-29 15:14 Or Gerlitz
       [not found] ` <1346253275-12347-1-git-send-email-ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Or Gerlitz @ 2012-08-29 15:14 UTC (permalink / raw)
  To: roland-DgEjT+Ai2ygdnm+yROfE0A
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, Or Gerlitz

Hi Roland,

This short series include two more fixes from Shlomo to the newly
introduced IPoIB neighbour table, and a fix for Yishai that completes
the support for memory registration of up to 8TB.

Or.

Shlomo Pongratz (2):
  IB/ipoib: Fix memory leak in the neigh table deletion flow
  IB/ipoib: Fix AB-BA deadlock when deleting neighbours

Yishai Hadas (1):
  net/mlx4_core: Enable 8TB memory registration

 drivers/infiniband/ulp/ipoib/ipoib.h           |    4 +-
 drivers/infiniband/ulp/ipoib/ipoib_main.c      |  101 +++++++++++------------
 drivers/infiniband/ulp/ipoib/ipoib_multicast.c |    2 -
 drivers/net/ethernet/mellanox/mlx4/icm.c       |   30 ++++---
 drivers/net/ethernet/mellanox/mlx4/icm.h       |   10 +-
 5 files changed, 74 insertions(+), 73 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH fixes/for-3.6 1/3] IB/ipoib: Fix memory leak in the neigh table deletion flow
       [not found] ` <1346253275-12347-1-git-send-email-ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
@ 2012-08-29 15:14   ` Or Gerlitz
  2012-08-29 15:14   ` [PATCH fixes/for-3.6 2/3] IB/ipoib: Fix AB-BA deadlock when deleting neighbours Or Gerlitz
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 8+ messages in thread
From: Or Gerlitz @ 2012-08-29 15:14 UTC (permalink / raw)
  To: roland-DgEjT+Ai2ygdnm+yROfE0A
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, Shlomo Pongratz, Or Gerlitz

From: Shlomo Pongratz <shlomop-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

If the neighbours hash table is empty when unloading the module,
then ipoib_flush_neighs, the cleanup routine wasn't called, and
the memory used for the hash table itself leaked.

To fix this, ipoib_flush_neighs is allways called, and another
completion object was added to signal when the table is freed.

Once inovked, ipoib_flush_neighs flushes all the neighbours
(if there are any), calls the the hash table RCU free routine,
(which now signals completion of the deletion process) and
waits for the last neighbour to be freed.

Signed-off-by: Shlomo Pongratz <shlomop-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Or Gerlitz <ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
 drivers/infiniband/ulp/ipoib/ipoib.h      |    3 +++
 drivers/infiniband/ulp/ipoib/ipoib_main.c |   23 +++++++++++++++++------
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index ca43901..36b5940 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -262,7 +262,9 @@ struct ipoib_ethtool_st {
 	u16     max_coalesced_frames;
 };
 
+struct ipoib_neigh_table;
 struct ipoib_neigh_hash {
+	struct ipoib_neigh_table       *ntbl;
 	struct ipoib_neigh __rcu      **buckets;
 	struct rcu_head			rcu;
 	u32				mask;
@@ -274,6 +276,7 @@ struct ipoib_neigh_table {
 	rwlock_t			rwlock;
 	atomic_t			entries;
 	struct completion		flushed;
+	struct completion		deleted;
 };
 
 /*
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 3e2085a..72c1fc2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1095,6 +1095,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
 	htbl->mask = (size - 1);
 	htbl->buckets = buckets;
 	ntbl->htbl = htbl;
+	htbl->ntbl = ntbl;
 	atomic_set(&ntbl->entries, 0);
 
 	/* start garbage collection */
@@ -1111,9 +1112,11 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
 						    struct ipoib_neigh_hash,
 						    rcu);
 	struct ipoib_neigh __rcu **buckets = htbl->buckets;
+	struct ipoib_neigh_table *ntbl = htbl->ntbl;
 
 	kfree(buckets);
 	kfree(htbl);
+	complete(&ntbl->deleted);
 }
 
 void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid)
@@ -1164,7 +1167,9 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv)
 	struct ipoib_neigh_table *ntbl = &priv->ntbl;
 	struct ipoib_neigh_hash *htbl;
 	unsigned long flags;
-	int i;
+	int i, wait_flushed = 0;
+
+	init_completion(&priv->ntbl.flushed);
 
 	write_lock_bh(&ntbl->rwlock);
 
@@ -1173,6 +1178,10 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv)
 	if (!htbl)
 		goto out_unlock;
 
+	wait_flushed = atomic_read(&priv->ntbl.entries);
+	if (!wait_flushed)
+		goto free_htbl;
+
 	for (i = 0; i < htbl->size; i++) {
 		struct ipoib_neigh *neigh;
 		struct ipoib_neigh __rcu **np = &htbl->buckets[i];
@@ -1190,11 +1199,14 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv)
 		}
 	}
 
+free_htbl:
 	rcu_assign_pointer(ntbl->htbl, NULL);
 	call_rcu(&htbl->rcu, neigh_hash_free_rcu);
 
 out_unlock:
 	write_unlock_bh(&ntbl->rwlock);
+	if (wait_flushed)
+		wait_for_completion(&priv->ntbl.flushed);
 }
 
 static void ipoib_neigh_hash_uninit(struct net_device *dev)
@@ -1203,7 +1215,7 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev)
 	int stopped;
 
 	ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n");
-	init_completion(&priv->ntbl.flushed);
+	init_completion(&priv->ntbl.deleted);
 	set_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags);
 
 	/* Stop GC if called at init fail need to cancel work */
@@ -1211,10 +1223,9 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev)
 	if (!stopped)
 		cancel_delayed_work(&priv->neigh_reap_task);
 
-	if (atomic_read(&priv->ntbl.entries)) {
-		ipoib_flush_neighs(priv);
-		wait_for_completion(&priv->ntbl.flushed);
-	}
+	ipoib_flush_neighs(priv);
+
+	wait_for_completion(&priv->ntbl.deleted);
 }
 
 
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH fixes/for-3.6 2/3] IB/ipoib: Fix AB-BA deadlock when deleting neighbours
       [not found] ` <1346253275-12347-1-git-send-email-ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
  2012-08-29 15:14   ` [PATCH fixes/for-3.6 1/3] IB/ipoib: Fix memory leak in the neigh table deletion flow Or Gerlitz
@ 2012-08-29 15:14   ` Or Gerlitz
       [not found]     ` <1346253275-12347-3-git-send-email-ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
  2012-08-29 15:14   ` [PATCH fixes/for-3.6 3/3] net/mlx4_core: Enable 8TB memory registration Or Gerlitz
  2012-09-08 19:48   ` [PATCH fixes/for-3.6 0/3] few more IB fixes for 3.6 Or Gerlitz
  3 siblings, 1 reply; 8+ messages in thread
From: Or Gerlitz @ 2012-08-29 15:14 UTC (permalink / raw)
  To: roland-DgEjT+Ai2ygdnm+yROfE0A
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, Shlomo Pongratz, Or Gerlitz

From: Shlomo Pongratz <shlomop-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

Lockdep pointed on a circular locking dependency betwwen the ipoib device
priv spinlock (priv->lock) and the neighbour table rwlock (ntbl->rwlock).

In normal path e.g. neigbour Garbage Collection task, the neigh table
rwlock is taken first and then if the neighbour need to be deleted,
priv->lock is taken.

However under error flow such as in ipoib_cm_handle_tx_wc, priv->lock is
taken first and then ipoib_neigh_free routine is called which in turn
takes the neighbour table ntbl->rwlock.

The combination would lead to deadlock when a normal flow runs in
parallel to error flow in two different CPUs. The solution was to
drop the neigh table rwlock and use only priv->lock.

Signed-off-by: Shlomo Pongratz <shlomop-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Or Gerlitz <ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
 drivers/infiniband/ulp/ipoib/ipoib.h           |    1 -
 drivers/infiniband/ulp/ipoib/ipoib_main.c      |   78 +++++++++--------------
 drivers/infiniband/ulp/ipoib/ipoib_multicast.c |    2 -
 3 files changed, 31 insertions(+), 50 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 36b5940..d7927f3 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -273,7 +273,6 @@ struct ipoib_neigh_hash {
 
 struct ipoib_neigh_table {
 	struct ipoib_neigh_hash __rcu  *htbl;
-	rwlock_t			rwlock;
 	atomic_t			entries;
 	struct completion		flushed;
 	struct completion		deleted;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 72c1fc2..ad7a456 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -546,15 +546,15 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
 	struct ipoib_neigh *neigh;
 	unsigned long flags;
 
+	spin_lock_irqsave(&priv->lock, flags);
 	neigh = ipoib_neigh_alloc(daddr, dev);
 	if (!neigh) {
+		spin_unlock_irqrestore(&priv->lock, flags);
 		++dev->stats.tx_dropped;
 		dev_kfree_skb_any(skb);
 		return;
 	}
 
-	spin_lock_irqsave(&priv->lock, flags);
-
 	path = __path_find(dev, daddr + 4);
 	if (!path) {
 		path = path_rec_create(dev, daddr + 4);
@@ -863,10 +863,10 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
 	if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
 		return;
 
-	write_lock_bh(&ntbl->rwlock);
+	spin_lock_irqsave(&priv->lock, flags);
 
 	htbl = rcu_dereference_protected(ntbl->htbl,
-					 lockdep_is_held(&ntbl->rwlock));
+					 lockdep_is_held(&priv->lock));
 
 	if (!htbl)
 		goto out_unlock;
@@ -883,16 +883,14 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
 		struct ipoib_neigh __rcu **np = &htbl->buckets[i];
 
 		while ((neigh = rcu_dereference_protected(*np,
-							  lockdep_is_held(&ntbl->rwlock))) != NULL) {
+				       lockdep_is_held(&priv->lock))) != NULL) {
 			/* was the neigh idle for two GC periods */
 			if (time_after(neigh_obsolete, neigh->alive)) {
 				rcu_assign_pointer(*np,
-						   rcu_dereference_protected(neigh->hnext,
-									     lockdep_is_held(&ntbl->rwlock)));
+					rcu_dereference_protected(neigh->hnext,
+								  lockdep_is_held(&priv->lock)));
 				/* remove from path/mc list */
-				spin_lock_irqsave(&priv->lock, flags);
 				list_del(&neigh->list);
-				spin_unlock_irqrestore(&priv->lock, flags);
 				call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
 			} else {
 				np = &neigh->hnext;
@@ -902,7 +900,7 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
 	}
 
 out_unlock:
-	write_unlock_bh(&ntbl->rwlock);
+	spin_unlock_irqrestore(&priv->lock, flags);
 }
 
 static void ipoib_reap_neigh(struct work_struct *work)
@@ -947,10 +945,8 @@ struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr,
 	struct ipoib_neigh *neigh;
 	u32 hash_val;
 
-	write_lock_bh(&ntbl->rwlock);
-
 	htbl = rcu_dereference_protected(ntbl->htbl,
-					 lockdep_is_held(&ntbl->rwlock));
+					 lockdep_is_held(&priv->lock));
 	if (!htbl) {
 		neigh = NULL;
 		goto out_unlock;
@@ -961,10 +957,10 @@ struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr,
 	 */
 	hash_val = ipoib_addr_hash(htbl, daddr);
 	for (neigh = rcu_dereference_protected(htbl->buckets[hash_val],
-					       lockdep_is_held(&ntbl->rwlock));
+					       lockdep_is_held(&priv->lock));
 	     neigh != NULL;
 	     neigh = rcu_dereference_protected(neigh->hnext,
-					       lockdep_is_held(&ntbl->rwlock))) {
+					       lockdep_is_held(&priv->lock))) {
 		if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) {
 			/* found, take one ref on behalf of the caller */
 			if (!atomic_inc_not_zero(&neigh->refcnt)) {
@@ -987,12 +983,11 @@ struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr,
 	/* put in hash */
 	rcu_assign_pointer(neigh->hnext,
 			   rcu_dereference_protected(htbl->buckets[hash_val],
-						     lockdep_is_held(&ntbl->rwlock)));
+						lockdep_is_held(&priv->lock)));
 	rcu_assign_pointer(htbl->buckets[hash_val], neigh);
 	atomic_inc(&ntbl->entries);
 
 out_unlock:
-	write_unlock_bh(&ntbl->rwlock);
 
 	return neigh;
 }
@@ -1040,35 +1035,29 @@ void ipoib_neigh_free(struct ipoib_neigh *neigh)
 	struct ipoib_neigh *n;
 	u32 hash_val;
 
-	write_lock_bh(&ntbl->rwlock);
-
 	htbl = rcu_dereference_protected(ntbl->htbl,
-					lockdep_is_held(&ntbl->rwlock));
+					lockdep_is_held(&priv->lock));
 	if (!htbl)
-		goto out_unlock;
+		return;
 
 	hash_val = ipoib_addr_hash(htbl, neigh->daddr);
 	np = &htbl->buckets[hash_val];
 	for (n = rcu_dereference_protected(*np,
-					    lockdep_is_held(&ntbl->rwlock));
+					    lockdep_is_held(&priv->lock));
 	     n != NULL;
 	     n = rcu_dereference_protected(*np,
-					lockdep_is_held(&ntbl->rwlock))) {
+					lockdep_is_held(&priv->lock))) {
 		if (n == neigh) {
 			/* found */
 			rcu_assign_pointer(*np,
-					   rcu_dereference_protected(neigh->hnext,
-								     lockdep_is_held(&ntbl->rwlock)));
+					rcu_dereference_protected(neigh->hnext,
+								  lockdep_is_held(&priv->lock)));
 			call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
-			goto out_unlock;
+			return;
 		} else {
 			np = &n->hnext;
 		}
 	}
-
-out_unlock:
-	write_unlock_bh(&ntbl->rwlock);
-
 }
 
 static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
@@ -1080,7 +1069,6 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
 
 	clear_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags);
 	ntbl->htbl = NULL;
-	rwlock_init(&ntbl->rwlock);
 	htbl = kzalloc(sizeof(*htbl), GFP_KERNEL);
 	if (!htbl)
 		return -ENOMEM;
@@ -1128,10 +1116,10 @@ void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid)
 	int i;
 
 	/* remove all neigh connected to a given path or mcast */
-	write_lock_bh(&ntbl->rwlock);
+	spin_lock_irqsave(&priv->lock, flags);
 
 	htbl = rcu_dereference_protected(ntbl->htbl,
-					 lockdep_is_held(&ntbl->rwlock));
+					 lockdep_is_held(&priv->lock));
 
 	if (!htbl)
 		goto out_unlock;
@@ -1141,16 +1129,14 @@ void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid)
 		struct ipoib_neigh __rcu **np = &htbl->buckets[i];
 
 		while ((neigh = rcu_dereference_protected(*np,
-							  lockdep_is_held(&ntbl->rwlock))) != NULL) {
+				       lockdep_is_held(&priv->lock))) != NULL) {
 			/* delete neighs belong to this parent */
 			if (!memcmp(gid, neigh->daddr + 4, sizeof (union ib_gid))) {
 				rcu_assign_pointer(*np,
-						   rcu_dereference_protected(neigh->hnext,
-									     lockdep_is_held(&ntbl->rwlock)));
+					rcu_dereference_protected(neigh->hnext,
+								  lockdep_is_held(&priv->lock)));
 				/* remove from parent list */
-				spin_lock_irqsave(&priv->lock, flags);
 				list_del(&neigh->list);
-				spin_unlock_irqrestore(&priv->lock, flags);
 				call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
 			} else {
 				np = &neigh->hnext;
@@ -1159,7 +1145,7 @@ void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid)
 		}
 	}
 out_unlock:
-	write_unlock_bh(&ntbl->rwlock);
+	spin_unlock_irqrestore(&priv->lock, flags);
 }
 
 static void ipoib_flush_neighs(struct ipoib_dev_priv *priv)
@@ -1171,10 +1157,10 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv)
 
 	init_completion(&priv->ntbl.flushed);
 
-	write_lock_bh(&ntbl->rwlock);
+	spin_lock_irqsave(&priv->lock, flags);
 
 	htbl = rcu_dereference_protected(ntbl->htbl,
-					lockdep_is_held(&ntbl->rwlock));
+					lockdep_is_held(&priv->lock));
 	if (!htbl)
 		goto out_unlock;
 
@@ -1187,14 +1173,12 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv)
 		struct ipoib_neigh __rcu **np = &htbl->buckets[i];
 
 		while ((neigh = rcu_dereference_protected(*np,
-							  lockdep_is_held(&ntbl->rwlock))) != NULL) {
+				       lockdep_is_held(&priv->lock))) != NULL) {
 			rcu_assign_pointer(*np,
-					   rcu_dereference_protected(neigh->hnext,
-								     lockdep_is_held(&ntbl->rwlock)));
+					rcu_dereference_protected(neigh->hnext,
+								  lockdep_is_held(&priv->lock)));
 			/* remove from path/mc list */
-			spin_lock_irqsave(&priv->lock, flags);
 			list_del(&neigh->list);
-			spin_unlock_irqrestore(&priv->lock, flags);
 			call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
 		}
 	}
@@ -1204,7 +1188,7 @@ free_htbl:
 	call_rcu(&htbl->rcu, neigh_hash_free_rcu);
 
 out_unlock:
-	write_unlock_bh(&ntbl->rwlock);
+	spin_unlock_irqrestore(&priv->lock, flags);
 	if (wait_flushed)
 		wait_for_completion(&priv->ntbl.flushed);
 }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 13f4aa7..7536724 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -707,9 +707,7 @@ out:
 		neigh = ipoib_neigh_get(dev, daddr);
 		spin_lock_irqsave(&priv->lock, flags);
 		if (!neigh) {
-			spin_unlock_irqrestore(&priv->lock, flags);
 			neigh = ipoib_neigh_alloc(daddr, dev);
-			spin_lock_irqsave(&priv->lock, flags);
 			if (neigh) {
 				kref_get(&mcast->ah->ref);
 				neigh->ah	= mcast->ah;
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH fixes/for-3.6 3/3] net/mlx4_core: Enable 8TB memory registration
       [not found] ` <1346253275-12347-1-git-send-email-ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
  2012-08-29 15:14   ` [PATCH fixes/for-3.6 1/3] IB/ipoib: Fix memory leak in the neigh table deletion flow Or Gerlitz
  2012-08-29 15:14   ` [PATCH fixes/for-3.6 2/3] IB/ipoib: Fix AB-BA deadlock when deleting neighbours Or Gerlitz
@ 2012-08-29 15:14   ` Or Gerlitz
       [not found]     ` <1346253275-12347-4-git-send-email-ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
  2012-09-08 19:48   ` [PATCH fixes/for-3.6 0/3] few more IB fixes for 3.6 Or Gerlitz
  3 siblings, 1 reply; 8+ messages in thread
From: Or Gerlitz @ 2012-08-29 15:14 UTC (permalink / raw)
  To: roland-DgEjT+Ai2ygdnm+yROfE0A
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, Yishai Hadas, Or Gerlitz

From: Yishai Hadas <yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

This patch adds on the fixes done in commits 89dd86db78 "Allow large
mlx4_buddy bitmaps" and 3de819e6b "Fix integer overflow issues around
MTT table" such that finally memory registration of up to 8TB
(log_num_mtt=31) works fine.

It handle int overflows in few mlx4_table_yyy routines in icm.c by
using a u64 intermediate variable, and int/uint issues that caused
table indexes to become nagive by setting some variables to be u32
instead of int, these problems turned into crashes that came into
play once a user attempted to register > 512GB of RAM.

Signed-off-by: Yishai Hadas <yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Or Gerlitz <ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
 drivers/net/ethernet/mellanox/mlx4/icm.c |   30 ++++++++++++++++++------------
 drivers/net/ethernet/mellanox/mlx4/icm.h |   10 +++++-----
 2 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c
index daf4179..3e91cb6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.c
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.c
@@ -227,9 +227,10 @@ int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev)
 			MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
 }
 
-int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
+int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj)
 {
-	int i = (obj & (table->num_obj - 1)) / (MLX4_TABLE_CHUNK_SIZE / table->obj_size);
+	u32 i = (obj & (table->num_obj - 1)) /
+			(MLX4_TABLE_CHUNK_SIZE / table->obj_size);
 	int ret = 0;
 
 	mutex_lock(&table->mutex);
@@ -262,16 +263,18 @@ out:
 	return ret;
 }
 
-void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
+void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj)
 {
-	int i;
+	u32 i;
+	u64 offset;
 
 	i = (obj & (table->num_obj - 1)) / (MLX4_TABLE_CHUNK_SIZE / table->obj_size);
 
 	mutex_lock(&table->mutex);
 
 	if (--table->icm[i]->refcount == 0) {
-		mlx4_UNMAP_ICM(dev, table->virt + i * MLX4_TABLE_CHUNK_SIZE,
+		offset = (u64)i * (u64)MLX4_TABLE_CHUNK_SIZE;
+		mlx4_UNMAP_ICM(dev, table->virt + offset,
 			       MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE);
 		mlx4_free_icm(dev, table->icm[i], table->coherent);
 		table->icm[i] = NULL;
@@ -280,9 +283,11 @@ void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
 	mutex_unlock(&table->mutex);
 }
 
-void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_handle)
+void *mlx4_table_find(struct mlx4_icm_table *table, u32 obj,
+			dma_addr_t *dma_handle)
 {
-	int idx, offset, dma_offset, i;
+	int offset, dma_offset, i;
+	u64 idx;
 	struct mlx4_icm_chunk *chunk;
 	struct mlx4_icm *icm;
 	struct page *page = NULL;
@@ -292,7 +297,7 @@ void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_han
 
 	mutex_lock(&table->mutex);
 
-	idx = (obj & (table->num_obj - 1)) * table->obj_size;
+	idx = (u64)(obj & (table->num_obj - 1)) * (u64)table->obj_size;
 	icm = table->icm[idx / MLX4_TABLE_CHUNK_SIZE];
 	dma_offset = offset = idx % MLX4_TABLE_CHUNK_SIZE;
 
@@ -326,10 +331,11 @@ out:
 }
 
 int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
-			 int start, int end)
+			 u32 start, u32 end)
 {
 	int inc = MLX4_TABLE_CHUNK_SIZE / table->obj_size;
-	int i, err;
+	int err;
+	u32 i;
 
 	for (i = start; i <= end; i += inc) {
 		err = mlx4_table_get(dev, table, i);
@@ -349,9 +355,9 @@ fail:
 }
 
 void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
-			  int start, int end)
+			  u32 start, u32 end)
 {
-	int i;
+	u32 i;
 
 	for (i = start; i <= end; i += MLX4_TABLE_CHUNK_SIZE / table->obj_size)
 		mlx4_table_put(dev, table, i);
diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.h b/drivers/net/ethernet/mellanox/mlx4/icm.h
index a67744f..dee67fa 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.h
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.h
@@ -71,17 +71,17 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
 				gfp_t gfp_mask, int coherent);
 void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent);
 
-int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
-void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
+int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj);
+void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj);
 int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
-			 int start, int end);
+			 u32 start, u32 end);
 void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
-			  int start, int end);
+			  u32 start, u32 end);
 int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
 			u64 virt, int obj_size,	u32 nobj, int reserved,
 			int use_lowmem, int use_coherent);
 void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table);
-void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_handle);
+void *mlx4_table_find(struct mlx4_icm_table *table, u32 obj, dma_addr_t *dma_handle);
 
 static inline void mlx4_icm_first(struct mlx4_icm *icm,
 				  struct mlx4_icm_iter *iter)
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH fixes/for-3.6 0/3] few more IB fixes for 3.6
       [not found] ` <1346253275-12347-1-git-send-email-ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
                     ` (2 preceding siblings ...)
  2012-08-29 15:14   ` [PATCH fixes/for-3.6 3/3] net/mlx4_core: Enable 8TB memory registration Or Gerlitz
@ 2012-09-08 19:48   ` Or Gerlitz
  3 siblings, 0 replies; 8+ messages in thread
From: Or Gerlitz @ 2012-09-08 19:48 UTC (permalink / raw)
  To: Roland Dreier; +Cc: linux-rdma, linux-kernel

On Wed, Aug 29, 2012 at 6:14 PM, Or Gerlitz <ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org> wrote:

> Hi Roland,
>
> This short series include two more fixes from Shlomo to the newly
> introduced IPoIB neighbour table, and a fix for Yishai that completes
> the support for memory registration of up to 8TB.

Roland,

AFAIK 3.6-rc5 should be out by tonight, these are fixes to known bugs
in there which I sent you eleven days ago and didn't get any comment,
could you get them in and/or react in some way?

thanks,

Or.

>
> Shlomo Pongratz (2):
>   IB/ipoib: Fix memory leak in the neigh table deletion flow
>   IB/ipoib: Fix AB-BA deadlock when deleting neighbours
>
> Yishai Hadas (1):
>   net/mlx4_core: Enable 8TB memory registration
>
>  drivers/infiniband/ulp/ipoib/ipoib.h           |    4 +-
>  drivers/infiniband/ulp/ipoib/ipoib_main.c      |  101 +++++++++++------------
>  drivers/infiniband/ulp/ipoib/ipoib_multicast.c |    2 -
>  drivers/net/ethernet/mellanox/mlx4/icm.c       |   30 ++++---
>  drivers/net/ethernet/mellanox/mlx4/icm.h       |   10 +-
>  5 files changed, 74 insertions(+), 73 deletions(-)
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH fixes/for-3.6 2/3] IB/ipoib: Fix AB-BA deadlock when deleting neighbours
       [not found]     ` <1346253275-12347-3-git-send-email-ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
@ 2012-09-12 16:23       ` Roland Dreier
       [not found]         ` <CAL1RGDUGuEE-5X10j6Y0B8fkSH1a1=NDARFYumwwmSjtE0WsJw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Roland Dreier @ 2012-09-12 16:23 UTC (permalink / raw)
  To: Or Gerlitz; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, Shlomo Pongratz

thanks, applied this and 1/3

On Wed, Aug 29, 2012 at 8:14 AM, Or Gerlitz <ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org> wrote:
>                 while ((neigh = rcu_dereference_protected(*np,
> -                                                         lockdep_is_held(&ntbl->rwlock))) != NULL) {
> +                                      lockdep_is_held(&priv->lock))) != NULL) {

please try to avoid messing up indentation when doing fixes like this.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH fixes/for-3.6 2/3] IB/ipoib: Fix AB-BA deadlock when deleting neighbours
       [not found]         ` <CAL1RGDUGuEE-5X10j6Y0B8fkSH1a1=NDARFYumwwmSjtE0WsJw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2012-09-13  5:30           ` Or Gerlitz
  0 siblings, 0 replies; 8+ messages in thread
From: Or Gerlitz @ 2012-09-13  5:30 UTC (permalink / raw)
  To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, Shlomo Pongratz

On 12/09/2012 19:23, Roland Dreier wrote:
> thanks, applied this and 1/3

OK, what about 3/3, any issue with merging it? also, I don't see them 
applied in your kernnel.org
tree fixes branch nor for-next, are you going to push them to 3.6 soon?


>
> On Wed, Aug 29, 2012 at 8:14 AM, Or Gerlitz <ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org> wrote:
>>                  while ((neigh = rcu_dereference_protected(*np,
>> -                                                         lockdep_is_held(&ntbl->rwlock))) != NULL) {
>> +                                      lockdep_is_held(&priv->lock))) != NULL) {
> please try to avoid messing up indentation when doing fixes like this.

will take care to do so
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH fixes/for-3.6 3/3] net/mlx4_core: Enable 8TB memory registration
       [not found]     ` <1346253275-12347-4-git-send-email-ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
@ 2012-09-14  0:52       ` Roland Dreier
  0 siblings, 0 replies; 8+ messages in thread
From: Roland Dreier @ 2012-09-14  0:52 UTC (permalink / raw)
  To: Or Gerlitz; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, Yishai Hadas

thanks, applied
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2012-09-14  0:52 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-08-29 15:14 [PATCH fixes/for-3.6 0/3] few more IB fixes for 3.6 Or Gerlitz
     [not found] ` <1346253275-12347-1-git-send-email-ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2012-08-29 15:14   ` [PATCH fixes/for-3.6 1/3] IB/ipoib: Fix memory leak in the neigh table deletion flow Or Gerlitz
2012-08-29 15:14   ` [PATCH fixes/for-3.6 2/3] IB/ipoib: Fix AB-BA deadlock when deleting neighbours Or Gerlitz
     [not found]     ` <1346253275-12347-3-git-send-email-ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2012-09-12 16:23       ` Roland Dreier
     [not found]         ` <CAL1RGDUGuEE-5X10j6Y0B8fkSH1a1=NDARFYumwwmSjtE0WsJw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2012-09-13  5:30           ` Or Gerlitz
2012-08-29 15:14   ` [PATCH fixes/for-3.6 3/3] net/mlx4_core: Enable 8TB memory registration Or Gerlitz
     [not found]     ` <1346253275-12347-4-git-send-email-ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2012-09-14  0:52       ` Roland Dreier
2012-09-08 19:48   ` [PATCH fixes/for-3.6 0/3] few more IB fixes for 3.6 Or Gerlitz

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).