netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net] sctp: use new rhlist interface on sctp transport rhashtable
@ 2016-11-15 15:23 Xin Long
  2016-11-15 18:04 ` Neil Horman
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Xin Long @ 2016-11-15 15:23 UTC (permalink / raw)
  To: network dev, linux-sctp
  Cc: davem, Marcelo Ricardo Leitner, Neil Horman, Vlad Yasevich,
	Herbert Xu, phil

Now sctp transport rhashtable uses hash(lport, dport, daddr) as the key
to hash a node to one chain. If in one host thousands of assocs connect
to one server with the same lport and different laddrs (although it's
not a normal case), all the transports would be hashed into the same
chain.

It may cause to keep returning -EBUSY when inserting a new node, as the
chain is too long and sctp inserts a transport node in a loop, which
could even lead to system hangs there.

The new rhlist interface works for this case that there are many nodes
with the same key in one chain. It puts them into a list then makes this
list be as a node of the chain.

This patch is to replace rhashtable_ interface with rhltable_ interface.
Since a chain would not be too long and it would not return -EBUSY with
this fix when inserting a node, the reinsert loop is also removed here.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
---
 include/net/sctp/sctp.h    |  2 +-
 include/net/sctp/structs.h |  4 +-
 net/sctp/associola.c       |  8 +++-
 net/sctp/input.c           | 93 ++++++++++++++++++++++++++--------------------
 net/sctp/socket.c          |  7 +---
 5 files changed, 64 insertions(+), 50 deletions(-)

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 31acc3f..f0dcaeb 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -164,7 +164,7 @@ void sctp_backlog_migrate(struct sctp_association *assoc,
 			  struct sock *oldsk, struct sock *newsk);
 int sctp_transport_hashtable_init(void);
 void sctp_transport_hashtable_destroy(void);
-void sctp_hash_transport(struct sctp_transport *t);
+int sctp_hash_transport(struct sctp_transport *t);
 void sctp_unhash_transport(struct sctp_transport *t);
 struct sctp_transport *sctp_addrs_lookup_transport(
 				struct net *net,
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 11c3bf2..c5a2d83 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -124,7 +124,7 @@ extern struct sctp_globals {
 	/* This is the sctp port control hash.	*/
 	struct sctp_bind_hashbucket *port_hashtable;
 	/* This is the hash of all transports. */
-	struct rhashtable transport_hashtable;
+	struct rhltable transport_hashtable;
 
 	/* Sizes of above hashtables. */
 	int ep_hashsize;
@@ -762,7 +762,7 @@ static inline int sctp_packet_empty(struct sctp_packet *packet)
 struct sctp_transport {
 	/* A list of transports. */
 	struct list_head transports;
-	struct rhash_head node;
+	struct rhlist_head node;
 
 	/* Reference counting. */
 	atomic_t refcnt;
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index f10d339..68428e1 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -700,11 +700,15 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
 	/* Set the peer's active state. */
 	peer->state = peer_state;
 
+	/* Add this peer into the transport hashtable */
+	if (sctp_hash_transport(peer)) {
+		sctp_transport_free(peer);
+		return NULL;
+	}
+
 	/* Attach the remote transport to our asoc.  */
 	list_add_tail_rcu(&peer->transports, &asoc->peer.transport_addr_list);
 	asoc->peer.transport_count++;
-	/* Add this peer into the transport hashtable */
-	sctp_hash_transport(peer);
 
 	/* If we do not yet have a primary path, set one.  */
 	if (!asoc->peer.primary_path) {
diff --git a/net/sctp/input.c b/net/sctp/input.c
index a01a56e..458e506 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -790,10 +790,9 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net,
 
 /* rhashtable for transport */
 struct sctp_hash_cmp_arg {
-	const struct sctp_endpoint	*ep;
-	const union sctp_addr		*laddr;
-	const union sctp_addr		*paddr;
-	const struct net		*net;
+	const union sctp_addr	*paddr;
+	const struct net	*net;
+	u16			lport;
 };
 
 static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
@@ -801,7 +800,6 @@ static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
 {
 	struct sctp_transport *t = (struct sctp_transport *)ptr;
 	const struct sctp_hash_cmp_arg *x = arg->key;
-	struct sctp_association *asoc;
 	int err = 1;
 
 	if (!sctp_cmp_addr_exact(&t->ipaddr, x->paddr))
@@ -809,19 +807,10 @@ static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
 	if (!sctp_transport_hold(t))
 		return err;
 
-	asoc = t->asoc;
-	if (!net_eq(sock_net(asoc->base.sk), x->net))
+	if (!net_eq(sock_net(t->asoc->base.sk), x->net))
+		goto out;
+	if (x->lport != htons(t->asoc->base.bind_addr.port))
 		goto out;
-	if (x->ep) {
-		if (x->ep != asoc->ep)
-			goto out;
-	} else {
-		if (x->laddr->v4.sin_port != htons(asoc->base.bind_addr.port))
-			goto out;
-		if (!sctp_bind_addr_match(&asoc->base.bind_addr,
-					  x->laddr, sctp_sk(asoc->base.sk)))
-			goto out;
-	}
 
 	err = 0;
 out:
@@ -851,11 +840,9 @@ static inline u32 sctp_hash_key(const void *data, u32 len, u32 seed)
 	const struct sctp_hash_cmp_arg *x = data;
 	const union sctp_addr *paddr = x->paddr;
 	const struct net *net = x->net;
-	u16 lport;
+	u16 lport = x->lport;
 	u32 addr;
 
-	lport = x->ep ? htons(x->ep->base.bind_addr.port) :
-			x->laddr->v4.sin_port;
 	if (paddr->sa.sa_family == AF_INET6)
 		addr = jhash(&paddr->v6.sin6_addr, 16, seed);
 	else
@@ -875,29 +862,32 @@ static const struct rhashtable_params sctp_hash_params = {
 
 int sctp_transport_hashtable_init(void)
 {
-	return rhashtable_init(&sctp_transport_hashtable, &sctp_hash_params);
+	return rhltable_init(&sctp_transport_hashtable, &sctp_hash_params);
 }
 
 void sctp_transport_hashtable_destroy(void)
 {
-	rhashtable_destroy(&sctp_transport_hashtable);
+	rhltable_destroy(&sctp_transport_hashtable);
 }
 
-void sctp_hash_transport(struct sctp_transport *t)
+int sctp_hash_transport(struct sctp_transport *t)
 {
 	struct sctp_hash_cmp_arg arg;
+	int err;
 
 	if (t->asoc->temp)
-		return;
+		return 0;
 
-	arg.ep = t->asoc->ep;
-	arg.paddr = &t->ipaddr;
 	arg.net   = sock_net(t->asoc->base.sk);
+	arg.paddr = &t->ipaddr;
+	arg.lport = htons(t->asoc->base.bind_addr.port);
 
-reinsert:
-	if (rhashtable_lookup_insert_key(&sctp_transport_hashtable, &arg,
-					 &t->node, sctp_hash_params) == -EBUSY)
-		goto reinsert;
+	err = rhltable_insert_key(&sctp_transport_hashtable, &arg,
+				  &t->node, sctp_hash_params);
+	if (err)
+		pr_err_once("insert transport fail, errno %d\n", err);
+
+	return err;
 }
 
 void sctp_unhash_transport(struct sctp_transport *t)
@@ -905,39 +895,62 @@ void sctp_unhash_transport(struct sctp_transport *t)
 	if (t->asoc->temp)
 		return;
 
-	rhashtable_remove_fast(&sctp_transport_hashtable, &t->node,
-			       sctp_hash_params);
+	rhltable_remove(&sctp_transport_hashtable, &t->node,
+			sctp_hash_params);
 }
 
+/* return a transport with holding it */
 struct sctp_transport *sctp_addrs_lookup_transport(
 				struct net *net,
 				const union sctp_addr *laddr,
 				const union sctp_addr *paddr)
 {
+	struct rhlist_head *tmp, *list;
+	struct sctp_transport *t;
 	struct sctp_hash_cmp_arg arg = {
-		.ep    = NULL,
-		.laddr = laddr,
 		.paddr = paddr,
 		.net   = net,
+		.lport = laddr->v4.sin_port,
 	};
 
-	return rhashtable_lookup_fast(&sctp_transport_hashtable, &arg,
-				      sctp_hash_params);
+	list = rhltable_lookup(&sctp_transport_hashtable, &arg,
+			       sctp_hash_params);
+
+	rhl_for_each_entry_rcu(t, tmp, list, node) {
+		if (!sctp_transport_hold(t))
+			continue;
+
+		if (sctp_bind_addr_match(&t->asoc->base.bind_addr,
+					 laddr, sctp_sk(t->asoc->base.sk)))
+			return t;
+		sctp_transport_put(t);
+	}
+
+	return NULL;
 }
 
+/* return a transport without holding it, as it's only used under sock lock */
 struct sctp_transport *sctp_epaddr_lookup_transport(
 				const struct sctp_endpoint *ep,
 				const union sctp_addr *paddr)
 {
 	struct net *net = sock_net(ep->base.sk);
+	struct rhlist_head *tmp, *list;
+	struct sctp_transport *t;
 	struct sctp_hash_cmp_arg arg = {
-		.ep    = ep,
 		.paddr = paddr,
 		.net   = net,
+		.lport = htons(ep->base.bind_addr.port),
 	};
 
-	return rhashtable_lookup_fast(&sctp_transport_hashtable, &arg,
-				      sctp_hash_params);
+	list = rhltable_lookup(&sctp_transport_hashtable, &arg,
+			       sctp_hash_params);
+
+	rhl_for_each_entry_rcu(t, tmp, list, node)
+		if (ep == t->asoc->ep)
+			return t;
+
+	return NULL;
 }
 
 /* Look up an association. */
@@ -951,7 +964,7 @@ static struct sctp_association *__sctp_lookup_association(
 	struct sctp_association *asoc = NULL;
 
 	t = sctp_addrs_lookup_transport(net, local, peer);
-	if (!t || !sctp_transport_hold(t))
+	if (!t)
 		goto out;
 
 	asoc = t->asoc;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index f23ad91..d5f4b4a 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4392,10 +4392,7 @@ int sctp_transport_walk_start(struct rhashtable_iter *iter)
 {
 	int err;
 
-	err = rhashtable_walk_init(&sctp_transport_hashtable, iter,
-				   GFP_KERNEL);
-	if (err)
-		return err;
+	rhltable_walk_enter(&sctp_transport_hashtable, iter);
 
 	err = rhashtable_walk_start(iter);
 	if (err && err != -EAGAIN) {
@@ -4479,7 +4476,7 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
 
 	rcu_read_lock();
 	transport = sctp_addrs_lookup_transport(net, laddr, paddr);
-	if (!transport || !sctp_transport_hold(transport))
+	if (!transport)
 		goto out;
 
 	rcu_read_unlock();
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH net] sctp: use new rhlist interface on sctp transport rhashtable
  2016-11-15 15:23 [PATCH net] sctp: use new rhlist interface on sctp transport rhashtable Xin Long
@ 2016-11-15 18:04 ` Neil Horman
  2016-11-16 13:34   ` Xin Long
  2016-11-16 19:17 ` Marcelo Ricardo Leitner
  2016-11-17  4:22 ` David Miller
  2 siblings, 1 reply; 6+ messages in thread
From: Neil Horman @ 2016-11-15 18:04 UTC (permalink / raw)
  To: Xin Long
  Cc: network dev, linux-sctp, davem, Marcelo Ricardo Leitner,
	Vlad Yasevich, Herbert Xu, phil

On Tue, Nov 15, 2016 at 11:23:11PM +0800, Xin Long wrote:
> Now sctp transport rhashtable uses hash(lport, dport, daddr) as the key
> to hash a node to one chain. If in one host thousands of assocs connect
> to one server with the same lport and different laddrs (although it's
> not a normal case), all the transports would be hashed into the same
> chain.
> 
> It may cause to keep returning -EBUSY when inserting a new node, as the
> chain is too long and sctp inserts a transport node in a loop, which
> could even lead to system hangs there.
> 
> The new rhlist interface works for this case that there are many nodes
> with the same key in one chain. It puts them into a list then makes this
> list be as a node of the chain.
> 
> This patch is to replace rhashtable_ interface with rhltable_ interface.
> Since a chain would not be too long and it would not return -EBUSY with
> this fix when inserting a node, the reinsert loop is also removed here.
> 
> Signed-off-by: Xin Long <lucien.xin@gmail.com>

Does this really buy us anything in this case though?  If the use case is that a
majority of the associations map to the same key, then you might avoid EBUSY for
the individual associaion that doesn't map there, but you still have to cope
with a huge linear search for the majority of the keys.

Might be more reasonable to mix saddr into the hash function so that your use
case gets spread through the hash table more evenly.

Neil

> ---
>  include/net/sctp/sctp.h    |  2 +-
>  include/net/sctp/structs.h |  4 +-
>  net/sctp/associola.c       |  8 +++-
>  net/sctp/input.c           | 93 ++++++++++++++++++++++++++--------------------
>  net/sctp/socket.c          |  7 +---
>  5 files changed, 64 insertions(+), 50 deletions(-)
> 
> diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
> index 31acc3f..f0dcaeb 100644
> --- a/include/net/sctp/sctp.h
> +++ b/include/net/sctp/sctp.h
> @@ -164,7 +164,7 @@ void sctp_backlog_migrate(struct sctp_association *assoc,
>  			  struct sock *oldsk, struct sock *newsk);
>  int sctp_transport_hashtable_init(void);
>  void sctp_transport_hashtable_destroy(void);
> -void sctp_hash_transport(struct sctp_transport *t);
> +int sctp_hash_transport(struct sctp_transport *t);
>  void sctp_unhash_transport(struct sctp_transport *t);
>  struct sctp_transport *sctp_addrs_lookup_transport(
>  				struct net *net,
> diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
> index 11c3bf2..c5a2d83 100644
> --- a/include/net/sctp/structs.h
> +++ b/include/net/sctp/structs.h
> @@ -124,7 +124,7 @@ extern struct sctp_globals {
>  	/* This is the sctp port control hash.	*/
>  	struct sctp_bind_hashbucket *port_hashtable;
>  	/* This is the hash of all transports. */
> -	struct rhashtable transport_hashtable;
> +	struct rhltable transport_hashtable;
>  
>  	/* Sizes of above hashtables. */
>  	int ep_hashsize;
> @@ -762,7 +762,7 @@ static inline int sctp_packet_empty(struct sctp_packet *packet)
>  struct sctp_transport {
>  	/* A list of transports. */
>  	struct list_head transports;
> -	struct rhash_head node;
> +	struct rhlist_head node;
>  
>  	/* Reference counting. */
>  	atomic_t refcnt;
> diff --git a/net/sctp/associola.c b/net/sctp/associola.c
> index f10d339..68428e1 100644
> --- a/net/sctp/associola.c
> +++ b/net/sctp/associola.c
> @@ -700,11 +700,15 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
>  	/* Set the peer's active state. */
>  	peer->state = peer_state;
>  
> +	/* Add this peer into the transport hashtable */
> +	if (sctp_hash_transport(peer)) {
> +		sctp_transport_free(peer);
> +		return NULL;
> +	}
> +
>  	/* Attach the remote transport to our asoc.  */
>  	list_add_tail_rcu(&peer->transports, &asoc->peer.transport_addr_list);
>  	asoc->peer.transport_count++;
> -	/* Add this peer into the transport hashtable */
> -	sctp_hash_transport(peer);
>  
>  	/* If we do not yet have a primary path, set one.  */
>  	if (!asoc->peer.primary_path) {
> diff --git a/net/sctp/input.c b/net/sctp/input.c
> index a01a56e..458e506 100644
> --- a/net/sctp/input.c
> +++ b/net/sctp/input.c
> @@ -790,10 +790,9 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net,
>  
>  /* rhashtable for transport */
>  struct sctp_hash_cmp_arg {
> -	const struct sctp_endpoint	*ep;
> -	const union sctp_addr		*laddr;
> -	const union sctp_addr		*paddr;
> -	const struct net		*net;
> +	const union sctp_addr	*paddr;
> +	const struct net	*net;
> +	u16			lport;
>  };
>  
>  static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
> @@ -801,7 +800,6 @@ static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
>  {
>  	struct sctp_transport *t = (struct sctp_transport *)ptr;
>  	const struct sctp_hash_cmp_arg *x = arg->key;
> -	struct sctp_association *asoc;
>  	int err = 1;
>  
>  	if (!sctp_cmp_addr_exact(&t->ipaddr, x->paddr))
> @@ -809,19 +807,10 @@ static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
>  	if (!sctp_transport_hold(t))
>  		return err;
>  
> -	asoc = t->asoc;
> -	if (!net_eq(sock_net(asoc->base.sk), x->net))
> +	if (!net_eq(sock_net(t->asoc->base.sk), x->net))
> +		goto out;
> +	if (x->lport != htons(t->asoc->base.bind_addr.port))
>  		goto out;
> -	if (x->ep) {
> -		if (x->ep != asoc->ep)
> -			goto out;
> -	} else {
> -		if (x->laddr->v4.sin_port != htons(asoc->base.bind_addr.port))
> -			goto out;
> -		if (!sctp_bind_addr_match(&asoc->base.bind_addr,
> -					  x->laddr, sctp_sk(asoc->base.sk)))
> -			goto out;
> -	}
>  
>  	err = 0;
>  out:
> @@ -851,11 +840,9 @@ static inline u32 sctp_hash_key(const void *data, u32 len, u32 seed)
>  	const struct sctp_hash_cmp_arg *x = data;
>  	const union sctp_addr *paddr = x->paddr;
>  	const struct net *net = x->net;
> -	u16 lport;
> +	u16 lport = x->lport;
>  	u32 addr;
>  
> -	lport = x->ep ? htons(x->ep->base.bind_addr.port) :
> -			x->laddr->v4.sin_port;
>  	if (paddr->sa.sa_family == AF_INET6)
>  		addr = jhash(&paddr->v6.sin6_addr, 16, seed);
>  	else
> @@ -875,29 +862,32 @@ static const struct rhashtable_params sctp_hash_params = {
>  
>  int sctp_transport_hashtable_init(void)
>  {
> -	return rhashtable_init(&sctp_transport_hashtable, &sctp_hash_params);
> +	return rhltable_init(&sctp_transport_hashtable, &sctp_hash_params);
>  }
>  
>  void sctp_transport_hashtable_destroy(void)
>  {
> -	rhashtable_destroy(&sctp_transport_hashtable);
> +	rhltable_destroy(&sctp_transport_hashtable);
>  }
>  
> -void sctp_hash_transport(struct sctp_transport *t)
> +int sctp_hash_transport(struct sctp_transport *t)
>  {
>  	struct sctp_hash_cmp_arg arg;
> +	int err;
>  
>  	if (t->asoc->temp)
> -		return;
> +		return 0;
>  
> -	arg.ep = t->asoc->ep;
> -	arg.paddr = &t->ipaddr;
>  	arg.net   = sock_net(t->asoc->base.sk);
> +	arg.paddr = &t->ipaddr;
> +	arg.lport = htons(t->asoc->base.bind_addr.port);
>  
> -reinsert:
> -	if (rhashtable_lookup_insert_key(&sctp_transport_hashtable, &arg,
> -					 &t->node, sctp_hash_params) == -EBUSY)
> -		goto reinsert;
> +	err = rhltable_insert_key(&sctp_transport_hashtable, &arg,
> +				  &t->node, sctp_hash_params);
> +	if (err)
> +		pr_err_once("insert transport fail, errno %d\n", err);
> +
> +	return err;
>  }
>  
>  void sctp_unhash_transport(struct sctp_transport *t)
> @@ -905,39 +895,62 @@ void sctp_unhash_transport(struct sctp_transport *t)
>  	if (t->asoc->temp)
>  		return;
>  
> -	rhashtable_remove_fast(&sctp_transport_hashtable, &t->node,
> -			       sctp_hash_params);
> +	rhltable_remove(&sctp_transport_hashtable, &t->node,
> +			sctp_hash_params);
>  }
>  
> +/* return a transport with holding it */
>  struct sctp_transport *sctp_addrs_lookup_transport(
>  				struct net *net,
>  				const union sctp_addr *laddr,
>  				const union sctp_addr *paddr)
>  {
> +	struct rhlist_head *tmp, *list;
> +	struct sctp_transport *t;
>  	struct sctp_hash_cmp_arg arg = {
> -		.ep    = NULL,
> -		.laddr = laddr,
>  		.paddr = paddr,
>  		.net   = net,
> +		.lport = laddr->v4.sin_port,
>  	};
>  
> -	return rhashtable_lookup_fast(&sctp_transport_hashtable, &arg,
> -				      sctp_hash_params);
> +	list = rhltable_lookup(&sctp_transport_hashtable, &arg,
> +			       sctp_hash_params);
> +
> +	rhl_for_each_entry_rcu(t, tmp, list, node) {
> +		if (!sctp_transport_hold(t))
> +			continue;
> +
> +		if (sctp_bind_addr_match(&t->asoc->base.bind_addr,
> +					 laddr, sctp_sk(t->asoc->base.sk)))
> +			return t;
> +		sctp_transport_put(t);
> +	}
> +
> +	return NULL;
>  }
>  
> +/* return a transport without holding it, as it's only used under sock lock */
>  struct sctp_transport *sctp_epaddr_lookup_transport(
>  				const struct sctp_endpoint *ep,
>  				const union sctp_addr *paddr)
>  {
>  	struct net *net = sock_net(ep->base.sk);
> +	struct rhlist_head *tmp, *list;
> +	struct sctp_transport *t;
>  	struct sctp_hash_cmp_arg arg = {
> -		.ep    = ep,
>  		.paddr = paddr,
>  		.net   = net,
> +		.lport = htons(ep->base.bind_addr.port),
>  	};
>  
> -	return rhashtable_lookup_fast(&sctp_transport_hashtable, &arg,
> -				      sctp_hash_params);
> +	list = rhltable_lookup(&sctp_transport_hashtable, &arg,
> +			       sctp_hash_params);
> +
> +	rhl_for_each_entry_rcu(t, tmp, list, node)
> +		if (ep == t->asoc->ep)
> +			return t;
> +
> +	return NULL;
>  }
>  
>  /* Look up an association. */
> @@ -951,7 +964,7 @@ static struct sctp_association *__sctp_lookup_association(
>  	struct sctp_association *asoc = NULL;
>  
>  	t = sctp_addrs_lookup_transport(net, local, peer);
> -	if (!t || !sctp_transport_hold(t))
> +	if (!t)
>  		goto out;
>  
>  	asoc = t->asoc;
> diff --git a/net/sctp/socket.c b/net/sctp/socket.c
> index f23ad91..d5f4b4a 100644
> --- a/net/sctp/socket.c
> +++ b/net/sctp/socket.c
> @@ -4392,10 +4392,7 @@ int sctp_transport_walk_start(struct rhashtable_iter *iter)
>  {
>  	int err;
>  
> -	err = rhashtable_walk_init(&sctp_transport_hashtable, iter,
> -				   GFP_KERNEL);
> -	if (err)
> -		return err;
> +	rhltable_walk_enter(&sctp_transport_hashtable, iter);
>  
>  	err = rhashtable_walk_start(iter);
>  	if (err && err != -EAGAIN) {
> @@ -4479,7 +4476,7 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
>  
>  	rcu_read_lock();
>  	transport = sctp_addrs_lookup_transport(net, laddr, paddr);
> -	if (!transport || !sctp_transport_hold(transport))
> +	if (!transport)
>  		goto out;
>  
>  	rcu_read_unlock();
> -- 
> 2.1.0
> 
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net] sctp: use new rhlist interface on sctp transport rhashtable
  2016-11-15 18:04 ` Neil Horman
@ 2016-11-16 13:34   ` Xin Long
  2016-11-16 19:09     ` Neil Horman
  0 siblings, 1 reply; 6+ messages in thread
From: Xin Long @ 2016-11-16 13:34 UTC (permalink / raw)
  To: Neil Horman
  Cc: network dev, linux-sctp, davem, Marcelo Ricardo Leitner,
	Vlad Yasevich, Herbert Xu, Phil Sutter

On Wed, Nov 16, 2016 at 2:04 AM, Neil Horman <nhorman@tuxdriver.com> wrote:
> On Tue, Nov 15, 2016 at 11:23:11PM +0800, Xin Long wrote:
>> Now sctp transport rhashtable uses hash(lport, dport, daddr) as the key
>> to hash a node to one chain. If in one host thousands of assocs connect
>> to one server with the same lport and different laddrs (although it's
>> not a normal case), all the transports would be hashed into the same
>> chain.
>>
>> It may cause to keep returning -EBUSY when inserting a new node, as the
>> chain is too long and sctp inserts a transport node in a loop, which
>> could even lead to system hangs there.
>>
>> The new rhlist interface works for this case that there are many nodes
>> with the same key in one chain. It puts them into a list then makes this
>> list be as a node of the chain.
>>
>> This patch is to replace rhashtable_ interface with rhltable_ interface.
>> Since a chain would not be too long and it would not return -EBUSY with
>> this fix when inserting a node, the reinsert loop is also removed here.
>>
>> Signed-off-by: Xin Long <lucien.xin@gmail.com>
>
> Does this really buy us anything in this case though?  If the use case is that a
> majority of the associations map to the same key, then you might avoid EBUSY for
> the individual associaion that doesn't map there, but you still have to cope
> with a huge linear search for the majority of the keys.
>

This patch is NOT for improving performance, it is to reorganize
transports in rhashtable in another way to avoid EBUSY, rhlist is born
for this.

Before this patch, the transport insert codes are pretty bad, if it returns
EBUSY, it would retry in a loop. now this patch avoid this and even
removed that loop, it's a fix for this issue.

> Might be more reasonable to mix saddr into the hash function so that your use
> case gets spread through the hash table more evenly.

we can not do this:
1. it will increase rhashtable's size when using multihome, if a host has
    N addrs, the size for one assoc will be N times bigger than now.
2. the hash node is inside transport, if we mix saddrs, when using multihome
    one transport would be hashed many times with different saddrs, we
    would have to define new structure to link transport.
we do not need to do this:
1. as the changelog said, "it's not a normal case", in one host (client), it
shouldn't connect to the same server with different saddrs.
2. now as long as paddr+dport+lport are different, rhashtable can hash
it evenly.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net] sctp: use new rhlist interface on sctp transport rhashtable
  2016-11-16 13:34   ` Xin Long
@ 2016-11-16 19:09     ` Neil Horman
  0 siblings, 0 replies; 6+ messages in thread
From: Neil Horman @ 2016-11-16 19:09 UTC (permalink / raw)
  To: Xin Long
  Cc: network dev, linux-sctp, davem, Marcelo Ricardo Leitner,
	Vlad Yasevich, Herbert Xu, Phil Sutter

On Wed, Nov 16, 2016 at 09:34:52PM +0800, Xin Long wrote:
> On Wed, Nov 16, 2016 at 2:04 AM, Neil Horman <nhorman@tuxdriver.com> wrote:
> > On Tue, Nov 15, 2016 at 11:23:11PM +0800, Xin Long wrote:
> >> Now sctp transport rhashtable uses hash(lport, dport, daddr) as the key
> >> to hash a node to one chain. If in one host thousands of assocs connect
> >> to one server with the same lport and different laddrs (although it's
> >> not a normal case), all the transports would be hashed into the same
> >> chain.
> >>
> >> It may cause to keep returning -EBUSY when inserting a new node, as the
> >> chain is too long and sctp inserts a transport node in a loop, which
> >> could even lead to system hangs there.
> >>
> >> The new rhlist interface works for this case that there are many nodes
> >> with the same key in one chain. It puts them into a list then makes this
> >> list be as a node of the chain.
> >>
> >> This patch is to replace rhashtable_ interface with rhltable_ interface.
> >> Since a chain would not be too long and it would not return -EBUSY with
> >> this fix when inserting a node, the reinsert loop is also removed here.
> >>
> >> Signed-off-by: Xin Long <lucien.xin@gmail.com>
> >
> > Does this really buy us anything in this case though?  If the use case is that a
> > majority of the associations map to the same key, then you might avoid EBUSY for
> > the individual associaion that doesn't map there, but you still have to cope
> > with a huge linear search for the majority of the keys.
> >
> 
> This patch is NOT for improving performance, it is to reorganize
> transports in rhashtable in another way to avoid EBUSY, rhlist is born
> for this.
> 
Never said it was a performance issue, just suggested that avoiding EBUSY
returns on inserts might be handled in other ways.

> Before this patch, the transport insert codes are pretty bad, if it returns
> EBUSY, it would retry in a loop. now this patch avoid this and even
> removed that loop, it's a fix for this issue.
> 
> > Might be more reasonable to mix saddr into the hash function so that your use
> > case gets spread through the hash table more evenly.
> 
> we can not do this:
> 1. it will increase rhashtable's size when using multihome, if a host has
>     N addrs, the size for one assoc will be N times bigger than now.
> 2. the hash node is inside transport, if we mix saddrs, when using multihome
>     one transport would be hashed many times with different saddrs, we
>     would have to define new structure to link transport.
> we do not need to do this:
> 1. as the changelog said, "it's not a normal case", in one host (client), it
> shouldn't connect to the same server with different saddrs.
> 2. now as long as paddr+dport+lport are different, rhashtable can hash
> it evenly.

Its the 'not a normal case' thats getting me.  Making a non-trivial change like
this for a corner use case typically makes me suspcious, but your points
regarding multiple hash entries being needed when saddr is used in a multihome
scenario make sense to me.  And looking at the rhltable code more closely, I
think this makes more sense

Acked-by: Neil Horman <nhorman@tuxdriver.com>

> --
> To unsubscribe from this list: send the line "unsubscribe linux-sctp" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net] sctp: use new rhlist interface on sctp transport rhashtable
  2016-11-15 15:23 [PATCH net] sctp: use new rhlist interface on sctp transport rhashtable Xin Long
  2016-11-15 18:04 ` Neil Horman
@ 2016-11-16 19:17 ` Marcelo Ricardo Leitner
  2016-11-17  4:22 ` David Miller
  2 siblings, 0 replies; 6+ messages in thread
From: Marcelo Ricardo Leitner @ 2016-11-16 19:17 UTC (permalink / raw)
  To: Xin Long
  Cc: network dev, linux-sctp, davem, Neil Horman, Vlad Yasevich,
	Herbert Xu, phil

On Tue, Nov 15, 2016 at 11:23:11PM +0800, Xin Long wrote:
> Now sctp transport rhashtable uses hash(lport, dport, daddr) as the key
> to hash a node to one chain. If in one host thousands of assocs connect
> to one server with the same lport and different laddrs (although it's
> not a normal case), all the transports would be hashed into the same
> chain.
> 
> It may cause to keep returning -EBUSY when inserting a new node, as the
> chain is too long and sctp inserts a transport node in a loop, which
> could even lead to system hangs there.
> 
> The new rhlist interface works for this case that there are many nodes
> with the same key in one chain. It puts them into a list then makes this
> list be as a node of the chain.
> 
> This patch is to replace rhashtable_ interface with rhltable_ interface.
> Since a chain would not be too long and it would not return -EBUSY with
> this fix when inserting a node, the reinsert loop is also removed here.
> 
> Signed-off-by: Xin Long <lucien.xin@gmail.com>

Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>

> ---
>  include/net/sctp/sctp.h    |  2 +-
>  include/net/sctp/structs.h |  4 +-
>  net/sctp/associola.c       |  8 +++-
>  net/sctp/input.c           | 93 ++++++++++++++++++++++++++--------------------
>  net/sctp/socket.c          |  7 +---
>  5 files changed, 64 insertions(+), 50 deletions(-)
> 
> diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
> index 31acc3f..f0dcaeb 100644
> --- a/include/net/sctp/sctp.h
> +++ b/include/net/sctp/sctp.h
> @@ -164,7 +164,7 @@ void sctp_backlog_migrate(struct sctp_association *assoc,
>  			  struct sock *oldsk, struct sock *newsk);
>  int sctp_transport_hashtable_init(void);
>  void sctp_transport_hashtable_destroy(void);
> -void sctp_hash_transport(struct sctp_transport *t);
> +int sctp_hash_transport(struct sctp_transport *t);
>  void sctp_unhash_transport(struct sctp_transport *t);
>  struct sctp_transport *sctp_addrs_lookup_transport(
>  				struct net *net,
> diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
> index 11c3bf2..c5a2d83 100644
> --- a/include/net/sctp/structs.h
> +++ b/include/net/sctp/structs.h
> @@ -124,7 +124,7 @@ extern struct sctp_globals {
>  	/* This is the sctp port control hash.	*/
>  	struct sctp_bind_hashbucket *port_hashtable;
>  	/* This is the hash of all transports. */
> -	struct rhashtable transport_hashtable;
> +	struct rhltable transport_hashtable;
>  
>  	/* Sizes of above hashtables. */
>  	int ep_hashsize;
> @@ -762,7 +762,7 @@ static inline int sctp_packet_empty(struct sctp_packet *packet)
>  struct sctp_transport {
>  	/* A list of transports. */
>  	struct list_head transports;
> -	struct rhash_head node;
> +	struct rhlist_head node;
>  
>  	/* Reference counting. */
>  	atomic_t refcnt;
> diff --git a/net/sctp/associola.c b/net/sctp/associola.c
> index f10d339..68428e1 100644
> --- a/net/sctp/associola.c
> +++ b/net/sctp/associola.c
> @@ -700,11 +700,15 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
>  	/* Set the peer's active state. */
>  	peer->state = peer_state;
>  
> +	/* Add this peer into the transport hashtable */
> +	if (sctp_hash_transport(peer)) {
> +		sctp_transport_free(peer);
> +		return NULL;
> +	}
> +
>  	/* Attach the remote transport to our asoc.  */
>  	list_add_tail_rcu(&peer->transports, &asoc->peer.transport_addr_list);
>  	asoc->peer.transport_count++;
> -	/* Add this peer into the transport hashtable */
> -	sctp_hash_transport(peer);
>  
>  	/* If we do not yet have a primary path, set one.  */
>  	if (!asoc->peer.primary_path) {
> diff --git a/net/sctp/input.c b/net/sctp/input.c
> index a01a56e..458e506 100644
> --- a/net/sctp/input.c
> +++ b/net/sctp/input.c
> @@ -790,10 +790,9 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net,
>  
>  /* rhashtable for transport */
>  struct sctp_hash_cmp_arg {
> -	const struct sctp_endpoint	*ep;
> -	const union sctp_addr		*laddr;
> -	const union sctp_addr		*paddr;
> -	const struct net		*net;
> +	const union sctp_addr	*paddr;
> +	const struct net	*net;
> +	u16			lport;
>  };
>  
>  static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
> @@ -801,7 +800,6 @@ static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
>  {
>  	struct sctp_transport *t = (struct sctp_transport *)ptr;
>  	const struct sctp_hash_cmp_arg *x = arg->key;
> -	struct sctp_association *asoc;
>  	int err = 1;
>  
>  	if (!sctp_cmp_addr_exact(&t->ipaddr, x->paddr))
> @@ -809,19 +807,10 @@ static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
>  	if (!sctp_transport_hold(t))
>  		return err;
>  
> -	asoc = t->asoc;
> -	if (!net_eq(sock_net(asoc->base.sk), x->net))
> +	if (!net_eq(sock_net(t->asoc->base.sk), x->net))
> +		goto out;
> +	if (x->lport != htons(t->asoc->base.bind_addr.port))
>  		goto out;
> -	if (x->ep) {
> -		if (x->ep != asoc->ep)
> -			goto out;
> -	} else {
> -		if (x->laddr->v4.sin_port != htons(asoc->base.bind_addr.port))
> -			goto out;
> -		if (!sctp_bind_addr_match(&asoc->base.bind_addr,
> -					  x->laddr, sctp_sk(asoc->base.sk)))
> -			goto out;
> -	}
>  
>  	err = 0;
>  out:
> @@ -851,11 +840,9 @@ static inline u32 sctp_hash_key(const void *data, u32 len, u32 seed)
>  	const struct sctp_hash_cmp_arg *x = data;
>  	const union sctp_addr *paddr = x->paddr;
>  	const struct net *net = x->net;
> -	u16 lport;
> +	u16 lport = x->lport;
>  	u32 addr;
>  
> -	lport = x->ep ? htons(x->ep->base.bind_addr.port) :
> -			x->laddr->v4.sin_port;
>  	if (paddr->sa.sa_family == AF_INET6)
>  		addr = jhash(&paddr->v6.sin6_addr, 16, seed);
>  	else
> @@ -875,29 +862,32 @@ static const struct rhashtable_params sctp_hash_params = {
>  
>  int sctp_transport_hashtable_init(void)
>  {
> -	return rhashtable_init(&sctp_transport_hashtable, &sctp_hash_params);
> +	return rhltable_init(&sctp_transport_hashtable, &sctp_hash_params);
>  }
>  
>  void sctp_transport_hashtable_destroy(void)
>  {
> -	rhashtable_destroy(&sctp_transport_hashtable);
> +	rhltable_destroy(&sctp_transport_hashtable);
>  }
>  
> -void sctp_hash_transport(struct sctp_transport *t)
> +int sctp_hash_transport(struct sctp_transport *t)
>  {
>  	struct sctp_hash_cmp_arg arg;
> +	int err;
>  
>  	if (t->asoc->temp)
> -		return;
> +		return 0;
>  
> -	arg.ep = t->asoc->ep;
> -	arg.paddr = &t->ipaddr;
>  	arg.net   = sock_net(t->asoc->base.sk);
> +	arg.paddr = &t->ipaddr;
> +	arg.lport = htons(t->asoc->base.bind_addr.port);
>  
> -reinsert:
> -	if (rhashtable_lookup_insert_key(&sctp_transport_hashtable, &arg,
> -					 &t->node, sctp_hash_params) == -EBUSY)
> -		goto reinsert;
> +	err = rhltable_insert_key(&sctp_transport_hashtable, &arg,
> +				  &t->node, sctp_hash_params);
> +	if (err)
> +		pr_err_once("insert transport fail, errno %d\n", err);
> +
> +	return err;
>  }
>  
>  void sctp_unhash_transport(struct sctp_transport *t)
> @@ -905,39 +895,62 @@ void sctp_unhash_transport(struct sctp_transport *t)
>  	if (t->asoc->temp)
>  		return;
>  
> -	rhashtable_remove_fast(&sctp_transport_hashtable, &t->node,
> -			       sctp_hash_params);
> +	rhltable_remove(&sctp_transport_hashtable, &t->node,
> +			sctp_hash_params);
>  }
>  
> +/* return a transport with holding it */
>  struct sctp_transport *sctp_addrs_lookup_transport(
>  				struct net *net,
>  				const union sctp_addr *laddr,
>  				const union sctp_addr *paddr)
>  {
> +	struct rhlist_head *tmp, *list;
> +	struct sctp_transport *t;
>  	struct sctp_hash_cmp_arg arg = {
> -		.ep    = NULL,
> -		.laddr = laddr,
>  		.paddr = paddr,
>  		.net   = net,
> +		.lport = laddr->v4.sin_port,
>  	};
>  
> -	return rhashtable_lookup_fast(&sctp_transport_hashtable, &arg,
> -				      sctp_hash_params);
> +	list = rhltable_lookup(&sctp_transport_hashtable, &arg,
> +			       sctp_hash_params);
> +
> +	rhl_for_each_entry_rcu(t, tmp, list, node) {
> +		if (!sctp_transport_hold(t))
> +			continue;
> +
> +		if (sctp_bind_addr_match(&t->asoc->base.bind_addr,
> +					 laddr, sctp_sk(t->asoc->base.sk)))
> +			return t;
> +		sctp_transport_put(t);
> +	}
> +
> +	return NULL;
>  }
>  
> +/* return a transport without holding it, as it's only used under sock lock */
>  struct sctp_transport *sctp_epaddr_lookup_transport(
>  				const struct sctp_endpoint *ep,
>  				const union sctp_addr *paddr)
>  {
>  	struct net *net = sock_net(ep->base.sk);
> +	struct rhlist_head *tmp, *list;
> +	struct sctp_transport *t;
>  	struct sctp_hash_cmp_arg arg = {
> -		.ep    = ep,
>  		.paddr = paddr,
>  		.net   = net,
> +		.lport = htons(ep->base.bind_addr.port),
>  	};
>  
> -	return rhashtable_lookup_fast(&sctp_transport_hashtable, &arg,
> -				      sctp_hash_params);
> +	list = rhltable_lookup(&sctp_transport_hashtable, &arg,
> +			       sctp_hash_params);
> +
> +	rhl_for_each_entry_rcu(t, tmp, list, node)
> +		if (ep == t->asoc->ep)
> +			return t;
> +
> +	return NULL;
>  }
>  
>  /* Look up an association. */
> @@ -951,7 +964,7 @@ static struct sctp_association *__sctp_lookup_association(
>  	struct sctp_association *asoc = NULL;
>  
>  	t = sctp_addrs_lookup_transport(net, local, peer);
> -	if (!t || !sctp_transport_hold(t))
> +	if (!t)
>  		goto out;
>  
>  	asoc = t->asoc;
> diff --git a/net/sctp/socket.c b/net/sctp/socket.c
> index f23ad91..d5f4b4a 100644
> --- a/net/sctp/socket.c
> +++ b/net/sctp/socket.c
> @@ -4392,10 +4392,7 @@ int sctp_transport_walk_start(struct rhashtable_iter *iter)
>  {
>  	int err;
>  
> -	err = rhashtable_walk_init(&sctp_transport_hashtable, iter,
> -				   GFP_KERNEL);
> -	if (err)
> -		return err;
> +	rhltable_walk_enter(&sctp_transport_hashtable, iter);
>  
>  	err = rhashtable_walk_start(iter);
>  	if (err && err != -EAGAIN) {
> @@ -4479,7 +4476,7 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
>  
>  	rcu_read_lock();
>  	transport = sctp_addrs_lookup_transport(net, laddr, paddr);
> -	if (!transport || !sctp_transport_hold(transport))
> +	if (!transport)
>  		goto out;
>  
>  	rcu_read_unlock();
> -- 
> 2.1.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-sctp" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net] sctp: use new rhlist interface on sctp transport rhashtable
  2016-11-15 15:23 [PATCH net] sctp: use new rhlist interface on sctp transport rhashtable Xin Long
  2016-11-15 18:04 ` Neil Horman
  2016-11-16 19:17 ` Marcelo Ricardo Leitner
@ 2016-11-17  4:22 ` David Miller
  2 siblings, 0 replies; 6+ messages in thread
From: David Miller @ 2016-11-17  4:22 UTC (permalink / raw)
  To: lucien.xin
  Cc: netdev, linux-sctp, marcelo.leitner, nhorman, vyasevich, herbert,
	phil

From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 15 Nov 2016 23:23:11 +0800

> Now sctp transport rhashtable uses hash(lport, dport, daddr) as the key
> to hash a node to one chain. If in one host thousands of assocs connect
> to one server with the same lport and different laddrs (although it's
> not a normal case), all the transports would be hashed into the same
> chain.
> 
> It may cause to keep returning -EBUSY when inserting a new node, as the
> chain is too long and sctp inserts a transport node in a loop, which
> could even lead to system hangs there.
> 
> The new rhlist interface works for this case that there are many nodes
> with the same key in one chain. It puts them into a list then makes this
> list be as a node of the chain.
> 
> This patch is to replace rhashtable_ interface with rhltable_ interface.
> Since a chain would not be too long and it would not return -EBUSY with
> this fix when inserting a node, the reinsert loop is also removed here.
> 
> Signed-off-by: Xin Long <lucien.xin@gmail.com>

Applied to net-next, thanks.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2016-11-17  4:22 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-11-15 15:23 [PATCH net] sctp: use new rhlist interface on sctp transport rhashtable Xin Long
2016-11-15 18:04 ` Neil Horman
2016-11-16 13:34   ` Xin Long
2016-11-16 19:09     ` Neil Horman
2016-11-16 19:17 ` Marcelo Ricardo Leitner
2016-11-17  4:22 ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).