* [PATCH 1/3] inetpeer: Support ipv6 addresses.
@ 2010-11-29 21:44 David Miller
2010-11-29 22:33 ` Stephen Hemminger
` (2 more replies)
0 siblings, 3 replies; 13+ messages in thread
From: David Miller @ 2010-11-29 21:44 UTC (permalink / raw)
To: netdev
Signed-off-by: David S. Miller <davem@davemloft.net>
---
include/net/inetpeer.h | 21 ++++++++++++++-
net/ipv4/inetpeer.c | 66 ++++++++++++++++++++++++++++++++++++++++-------
net/ipv4/ip_fragment.c | 2 +-
net/ipv4/route.c | 2 +-
net/ipv4/tcp_ipv4.c | 6 ++--
5 files changed, 80 insertions(+), 17 deletions(-)
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index fe239bf..834f045 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -13,10 +13,18 @@
#include <linux/spinlock.h>
#include <asm/atomic.h>
+typedef struct {
+ union {
+ __be32 a4;
+ __be32 a6[4];
+ };
+ __u16 family;
+} inet_peer_address_t;
+
struct inet_peer {
/* group together avl_left,avl_right,v4daddr to speedup lookups */
struct inet_peer __rcu *avl_left, *avl_right;
- __be32 v4daddr; /* peer's address */
+ inet_peer_address_t daddr;
__u32 avl_height;
struct list_head unused;
__u32 dtime; /* the time of last use of not
@@ -42,7 +50,16 @@ struct inet_peer {
void inet_initpeers(void) __init;
/* can be called with or without local BH being disabled */
-struct inet_peer *inet_getpeer(__be32 daddr, int create);
+struct inet_peer *inet_getpeer(inet_peer_address_t *daddr, int create);
+
+static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create)
+{
+ inet_peer_address_t daddr;
+
+ daddr.a4 = v4daddr;
+ daddr.family = AF_INET;
+ return inet_getpeer(&daddr, create);
+}
/* can be called from BH context or outside */
extern void inet_putpeer(struct inet_peer *p);
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 9e94d7c..39f18c5 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -19,6 +19,7 @@
#include <linux/net.h>
#include <net/ip.h>
#include <net/inetpeer.h>
+#include <net/ipv6.h>
/*
* Theory of operations.
@@ -152,11 +153,53 @@ static void unlink_from_unused(struct inet_peer *p)
}
}
+static inline bool inet_peer_addr_equal(inet_peer_address_t *a, inet_peer_address_t *b)
+{
+ if (a->family == b->family) {
+ switch (a->family) {
+ case AF_INET:
+ if (a->a4 == b->a4)
+ return true;
+ break;
+ case AF_INET6:
+ if (!ipv6_addr_cmp((struct in6_addr *)a,
+ (struct in6_addr *)b))
+ return true;
+ break;
+ default:
+ break;
+ }
+ }
+ return false;
+}
+
+static inline u32 inet_peer_key(inet_peer_address_t *a)
+{
+ u32 key;
+
+ switch (a->family) {
+ case AF_INET:
+ key = (__force __u32) a->a4;
+ break;
+ case AF_INET6:
+ key = ((__force __u32)a->a6[0] ^
+ (__force __u32)a->a6[1] ^
+ (__force __u32)a->a6[2] ^
+ (__force __u32)a->a6[3]);
+ break;
+ default:
+ key = 0;
+ break;
+ }
+ return key;
+}
+
/*
* Called with local BH disabled and the pool lock held.
*/
#define lookup(_daddr, _stack) \
({ \
+ u32 key = inet_peer_key(_daddr); \
struct inet_peer *u; \
struct inet_peer __rcu **v; \
\
@@ -165,9 +208,9 @@ static void unlink_from_unused(struct inet_peer *p)
for (u = rcu_dereference_protected(peers.root, \
lockdep_is_held(&peers.lock)); \
u != peer_avl_empty; ) { \
- if (_daddr == u->v4daddr) \
+ if (inet_peer_addr_equal(_daddr, &u->daddr)) \
break; \
- if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \
+ if (key < inet_peer_key(&u->daddr)) \
v = &u->avl_left; \
else \
v = &u->avl_right; \
@@ -185,13 +228,14 @@ static void unlink_from_unused(struct inet_peer *p)
* But every pointer we follow is guaranteed to be valid thanks to RCU.
* We exit from this function if number of links exceeds PEER_MAXDEPTH
*/
-static struct inet_peer *lookup_rcu_bh(__be32 daddr)
+static struct inet_peer *lookup_rcu_bh(inet_peer_address_t *daddr)
{
struct inet_peer *u = rcu_dereference_bh(peers.root);
+ u32 key = inet_peer_key(daddr);
int count = 0;
while (u != peer_avl_empty) {
- if (daddr == u->v4daddr) {
+ if (inet_peer_addr_equal(daddr, &u->daddr)) {
/* Before taking a reference, check if this entry was
* deleted, unlink_from_pool() sets refcnt=-1 to make
* distinction between an unused entry (refcnt=0) and
@@ -201,7 +245,7 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
u = NULL;
return u;
}
- if ((__force __u32)daddr < (__force __u32)u->v4daddr)
+ if (key < inet_peer_key(&u->daddr))
u = rcu_dereference_bh(u->avl_left);
else
u = rcu_dereference_bh(u->avl_right);
@@ -353,7 +397,7 @@ static void unlink_from_pool(struct inet_peer *p)
if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
struct inet_peer __rcu **stack[PEER_MAXDEPTH];
struct inet_peer __rcu ***stackptr, ***delp;
- if (lookup(p->v4daddr, stack) != p)
+ if (lookup(&p->daddr, stack) != p)
BUG();
delp = stackptr - 1; /* *delp[0] == p */
if (p->avl_left == peer_avl_empty_rcu) {
@@ -366,7 +410,7 @@ static void unlink_from_pool(struct inet_peer *p)
BUG_ON(rcu_dereference_protected(*stackptr[-1],
lockdep_is_held(&peers.lock)) != t);
**--stackptr = t->avl_left;
- /* t is removed, t->v4daddr > x->v4daddr for any
+ /* t is removed, t->daddr > x->daddr for any
* x in p->avl_left subtree.
* Put t in the old place of p. */
RCU_INIT_POINTER(*delp[0], t);
@@ -433,7 +477,7 @@ static int cleanup_once(unsigned long ttl)
}
/* Called with or without local BH being disabled. */
-struct inet_peer *inet_getpeer(__be32 daddr, int create)
+struct inet_peer *inet_getpeer(inet_peer_address_t *daddr, int create)
{
struct inet_peer *p;
struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
@@ -467,10 +511,12 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
}
p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
if (p) {
- p->v4daddr = daddr;
+ p->daddr = *daddr;
atomic_set(&p->refcnt, 1);
atomic_set(&p->rid, 0);
- atomic_set(&p->ip_id_count, secure_ip_id(daddr));
+ atomic_set(&p->ip_id_count,
+ (daddr->family == AF_INET) ?
+ secure_ip_id(daddr->a4) : 0);
p->tcp_ts_stamp = 0;
INIT_LIST_HEAD(&p->unused);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 1684408..e6215bd 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -141,7 +141,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a)
qp->daddr = arg->iph->daddr;
qp->user = arg->user;
qp->peer = sysctl_ipfrag_max_dist ?
- inet_getpeer(arg->iph->saddr, 1) : NULL;
+ inet_getpeer_v4(arg->iph->saddr, 1) : NULL;
}
static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ec2333f..3843c2d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1289,7 +1289,7 @@ void rt_bind_peer(struct rtable *rt, int create)
{
struct inet_peer *peer;
- peer = inet_getpeer(rt->rt_dst, create);
+ peer = inet_getpeer_v4(rt->rt_dst, create);
if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
inet_putpeer(peer);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 69ccbc1..00285fc 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1347,7 +1347,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
tcp_death_row.sysctl_tw_recycle &&
(dst = inet_csk_route_req(sk, req)) != NULL &&
(peer = rt_get_peer((struct rtable *)dst)) != NULL &&
- peer->v4daddr == saddr) {
+ peer->daddr.a4 == saddr) {
inet_peer_refcheck(peer);
if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
(s32)(peer->tcp_ts - req->ts_recent) >
@@ -1778,7 +1778,7 @@ int tcp_v4_remember_stamp(struct sock *sk)
int release_it = 0;
if (!rt || rt->rt_dst != inet->inet_daddr) {
- peer = inet_getpeer(inet->inet_daddr, 1);
+ peer = inet_getpeer_v4(inet->inet_daddr, 1);
release_it = 1;
} else {
if (!rt->peer)
@@ -1804,7 +1804,7 @@ EXPORT_SYMBOL(tcp_v4_remember_stamp);
int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
{
- struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
+ struct inet_peer *peer = inet_getpeer_v4(tw->tw_daddr, 1);
if (peer) {
const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
--
1.7.3.2
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH 1/3] inetpeer: Support ipv6 addresses.
2010-11-29 21:44 [PATCH 1/3] inetpeer: Support ipv6 addresses David Miller
@ 2010-11-29 22:33 ` Stephen Hemminger
2010-11-30 3:18 ` David Miller
2010-11-30 2:11 ` Brian Haley
2010-11-30 2:33 ` Changli Gao
2 siblings, 1 reply; 13+ messages in thread
From: Stephen Hemminger @ 2010-11-29 22:33 UTC (permalink / raw)
To: David Miller; +Cc: netdev
On Mon, 29 Nov 2010 13:44:03 -0800 (PST)
David Miller <davem@davemloft.net> wrote:
> +static inline bool inet_peer_addr_equal(inet_peer_address_t *a, inet_peer_address_t *b)
Let the compiler decide about inline.
Should be const inet_peer_address_t *
--
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 1/3] inetpeer: Support ipv6 addresses.
2010-11-29 22:33 ` Stephen Hemminger
@ 2010-11-30 3:18 ` David Miller
0 siblings, 0 replies; 13+ messages in thread
From: David Miller @ 2010-11-30 3:18 UTC (permalink / raw)
To: shemminger; +Cc: netdev
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 29 Nov 2010 14:33:57 -0800
> On Mon, 29 Nov 2010 13:44:03 -0800 (PST)
> David Miller <davem@davemloft.net> wrote:
>
>> +static inline bool inet_peer_addr_equal(inet_peer_address_t *a, inet_peer_address_t *b)
>
> Let the compiler decide about inline.
> Should be const inet_peer_address_t *
Yep, I made those changes thanks.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 1/3] inetpeer: Support ipv6 addresses.
2010-11-29 21:44 [PATCH 1/3] inetpeer: Support ipv6 addresses David Miller
2010-11-29 22:33 ` Stephen Hemminger
@ 2010-11-30 2:11 ` Brian Haley
2010-11-30 3:18 ` David Miller
2010-11-30 2:33 ` Changli Gao
2 siblings, 1 reply; 13+ messages in thread
From: Brian Haley @ 2010-11-30 2:11 UTC (permalink / raw)
To: David Miller; +Cc: netdev
On 11/29/2010 04:44 PM, David Miller wrote:
> +static inline bool inet_peer_addr_equal(inet_peer_address_t *a, inet_peer_address_t *b)
> +{
> + if (a->family == b->family) {
> + switch (a->family) {
> + case AF_INET:
> + if (a->a4 == b->a4)
> + return true;
> + break;
> + case AF_INET6:
> + if (!ipv6_addr_cmp((struct in6_addr *)a,
> + (struct in6_addr *)b))
> + return true;
> + break;
ipv6_addr_equal() ?
-Brian
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 1/3] inetpeer: Support ipv6 addresses.
2010-11-29 21:44 [PATCH 1/3] inetpeer: Support ipv6 addresses David Miller
2010-11-29 22:33 ` Stephen Hemminger
2010-11-30 2:11 ` Brian Haley
@ 2010-11-30 2:33 ` Changli Gao
2010-11-30 3:14 ` David Miller
2 siblings, 1 reply; 13+ messages in thread
From: Changli Gao @ 2010-11-30 2:33 UTC (permalink / raw)
To: David Miller; +Cc: netdev
On Tue, Nov 30, 2010 at 5:44 AM, David Miller <davem@davemloft.net> wrote:
>
> Signed-off-by: David S. Miller <davem@davemloft.net>
> ---
> include/net/inetpeer.h | 21 ++++++++++++++-
> net/ipv4/inetpeer.c | 66 ++++++++++++++++++++++++++++++++++++++++-------
> net/ipv4/ip_fragment.c | 2 +-
> net/ipv4/route.c | 2 +-
> net/ipv4/tcp_ipv4.c | 6 ++--
> 5 files changed, 80 insertions(+), 17 deletions(-)
>
> diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
> index fe239bf..834f045 100644
> --- a/include/net/inetpeer.h
> +++ b/include/net/inetpeer.h
> @@ -13,10 +13,18 @@
> #include <linux/spinlock.h>
> #include <asm/atomic.h>
>
> +typedef struct {
> + union {
> + __be32 a4;
> + __be32 a6[4];
> + };
> + __u16 family;
> +} inet_peer_address_t;
> +
This makes inet_peer large even though ipv6 isn't enabled.
> struct inet_peer {
> /* group together avl_left,avl_right,v4daddr to speedup lookups */
> struct inet_peer __rcu *avl_left, *avl_right;
> - __be32 v4daddr; /* peer's address */
> + inet_peer_address_t daddr;
I have thought about converting this AVL tree to rbtree. When I saw
the comment above, I gave it up, because rbtree makes this structure
bigger. If ipv6 support is added, I think it is time to turn to
rbtree. :)
> __u32 avl_height;
> struct list_head unused;
> __u32 dtime; /* the time of last use of not
> @@ -42,7 +50,16 @@ struct inet_peer {
> void inet_initpeers(void) __init;
>
> /* can be called with or without local BH being disabled */
> -struct inet_peer *inet_getpeer(__be32 daddr, int create);
> +struct inet_peer *inet_getpeer(inet_peer_address_t *daddr, int create);
> +
> +static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create)
> +{
> + inet_peer_address_t daddr;
> +
> + daddr.a4 = v4daddr;
> + daddr.family = AF_INET;
> + return inet_getpeer(&daddr, create);
> +}
>
> /* can be called from BH context or outside */
> extern void inet_putpeer(struct inet_peer *p);
> diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
> index 9e94d7c..39f18c5 100644
> --- a/net/ipv4/inetpeer.c
> +++ b/net/ipv4/inetpeer.c
> @@ -19,6 +19,7 @@
> #include <linux/net.h>
> #include <net/ip.h>
> #include <net/inetpeer.h>
> +#include <net/ipv6.h>
>
> /*
> * Theory of operations.
> @@ -152,11 +153,53 @@ static void unlink_from_unused(struct inet_peer *p)
> }
> }
>
> +static inline bool inet_peer_addr_equal(inet_peer_address_t *a, inet_peer_address_t *b)
> +{
> + if (a->family == b->family) {
> + switch (a->family) {
> + case AF_INET:
> + if (a->a4 == b->a4)
> + return true;
> + break;
> + case AF_INET6:
> + if (!ipv6_addr_cmp((struct in6_addr *)a,
> + (struct in6_addr *)b))
> + return true;
> + break;
> + default:
> + break;
> + }
> + }
> + return false;
> +}
> +
> +static inline u32 inet_peer_key(inet_peer_address_t *a)
> +{
> + u32 key;
> +
> + switch (a->family) {
> + case AF_INET:
> + key = (__force __u32) a->a4;
> + break;
> + case AF_INET6:
> + key = ((__force __u32)a->a6[0] ^
> + (__force __u32)a->a6[1] ^
> + (__force __u32)a->a6[2] ^
> + (__force __u32)a->a6[3]);
> + break;
> + default:
> + key = 0;
> + break;
> + }
> + return key;
> +}
> +
> /*
> * Called with local BH disabled and the pool lock held.
> */
> #define lookup(_daddr, _stack) \
> ({ \
> + u32 key = inet_peer_key(_daddr); \
> struct inet_peer *u; \
> struct inet_peer __rcu **v; \
> \
> @@ -165,9 +208,9 @@ static void unlink_from_unused(struct inet_peer *p)
> for (u = rcu_dereference_protected(peers.root, \
> lockdep_is_held(&peers.lock)); \
> u != peer_avl_empty; ) { \
> - if (_daddr == u->v4daddr) \
> + if (inet_peer_addr_equal(_daddr, &u->daddr)) \
> break; \
> - if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \
> + if (key < inet_peer_key(&u->daddr)) \
> v = &u->avl_left; \
> else \
> v = &u->avl_right; \
> @@ -185,13 +228,14 @@ static void unlink_from_unused(struct inet_peer *p)
> * But every pointer we follow is guaranteed to be valid thanks to RCU.
> * We exit from this function if number of links exceeds PEER_MAXDEPTH
> */
> -static struct inet_peer *lookup_rcu_bh(__be32 daddr)
> +static struct inet_peer *lookup_rcu_bh(inet_peer_address_t *daddr)
> {
> struct inet_peer *u = rcu_dereference_bh(peers.root);
> + u32 key = inet_peer_key(daddr);
> int count = 0;
>
> while (u != peer_avl_empty) {
> - if (daddr == u->v4daddr) {
> + if (inet_peer_addr_equal(daddr, &u->daddr)) {
> /* Before taking a reference, check if this entry was
> * deleted, unlink_from_pool() sets refcnt=-1 to make
> * distinction between an unused entry (refcnt=0) and
> @@ -201,7 +245,7 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
> u = NULL;
> return u;
> }
> - if ((__force __u32)daddr < (__force __u32)u->v4daddr)
> + if (key < inet_peer_key(&u->daddr))
> u = rcu_dereference_bh(u->avl_left);
> else
> u = rcu_dereference_bh(u->avl_right);
> @@ -353,7 +397,7 @@ static void unlink_from_pool(struct inet_peer *p)
> if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
> struct inet_peer __rcu **stack[PEER_MAXDEPTH];
> struct inet_peer __rcu ***stackptr, ***delp;
> - if (lookup(p->v4daddr, stack) != p)
> + if (lookup(&p->daddr, stack) != p)
> BUG();
> delp = stackptr - 1; /* *delp[0] == p */
> if (p->avl_left == peer_avl_empty_rcu) {
> @@ -366,7 +410,7 @@ static void unlink_from_pool(struct inet_peer *p)
> BUG_ON(rcu_dereference_protected(*stackptr[-1],
> lockdep_is_held(&peers.lock)) != t);
> **--stackptr = t->avl_left;
> - /* t is removed, t->v4daddr > x->v4daddr for any
> + /* t is removed, t->daddr > x->daddr for any
> * x in p->avl_left subtree.
> * Put t in the old place of p. */
> RCU_INIT_POINTER(*delp[0], t);
> @@ -433,7 +477,7 @@ static int cleanup_once(unsigned long ttl)
> }
>
> /* Called with or without local BH being disabled. */
> -struct inet_peer *inet_getpeer(__be32 daddr, int create)
> +struct inet_peer *inet_getpeer(inet_peer_address_t *daddr, int create)
> {
> struct inet_peer *p;
> struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
> @@ -467,10 +511,12 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
> }
> p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
> if (p) {
> - p->v4daddr = daddr;
> + p->daddr = *daddr;
> atomic_set(&p->refcnt, 1);
> atomic_set(&p->rid, 0);
> - atomic_set(&p->ip_id_count, secure_ip_id(daddr));
> + atomic_set(&p->ip_id_count,
> + (daddr->family == AF_INET) ?
> + secure_ip_id(daddr->a4) : 0);
> p->tcp_ts_stamp = 0;
> INIT_LIST_HEAD(&p->unused);
>
> diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
> index 1684408..e6215bd 100644
> --- a/net/ipv4/ip_fragment.c
> +++ b/net/ipv4/ip_fragment.c
> @@ -141,7 +141,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a)
> qp->daddr = arg->iph->daddr;
> qp->user = arg->user;
> qp->peer = sysctl_ipfrag_max_dist ?
> - inet_getpeer(arg->iph->saddr, 1) : NULL;
> + inet_getpeer_v4(arg->iph->saddr, 1) : NULL;
> }
>
> static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index ec2333f..3843c2d 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -1289,7 +1289,7 @@ void rt_bind_peer(struct rtable *rt, int create)
> {
> struct inet_peer *peer;
>
> - peer = inet_getpeer(rt->rt_dst, create);
> + peer = inet_getpeer_v4(rt->rt_dst, create);
>
> if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
> inet_putpeer(peer);
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index 69ccbc1..00285fc 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -1347,7 +1347,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
> tcp_death_row.sysctl_tw_recycle &&
> (dst = inet_csk_route_req(sk, req)) != NULL &&
> (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
> - peer->v4daddr == saddr) {
> + peer->daddr.a4 == saddr) {
> inet_peer_refcheck(peer);
> if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
> (s32)(peer->tcp_ts - req->ts_recent) >
> @@ -1778,7 +1778,7 @@ int tcp_v4_remember_stamp(struct sock *sk)
> int release_it = 0;
>
> if (!rt || rt->rt_dst != inet->inet_daddr) {
> - peer = inet_getpeer(inet->inet_daddr, 1);
> + peer = inet_getpeer_v4(inet->inet_daddr, 1);
> release_it = 1;
> } else {
> if (!rt->peer)
> @@ -1804,7 +1804,7 @@ EXPORT_SYMBOL(tcp_v4_remember_stamp);
>
> int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
> {
> - struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
> + struct inet_peer *peer = inet_getpeer_v4(tw->tw_daddr, 1);
>
> if (peer) {
> const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
> --
> 1.7.3.2
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
--
Regards,
Changli Gao(xiaosuo@gmail.com)
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 1/3] inetpeer: Support ipv6 addresses.
2010-11-30 2:33 ` Changli Gao
@ 2010-11-30 3:14 ` David Miller
2010-11-30 4:51 ` Changli Gao
0 siblings, 1 reply; 13+ messages in thread
From: David Miller @ 2010-11-30 3:14 UTC (permalink / raw)
To: xiaosuo; +Cc: netdev
From: Changli Gao <xiaosuo@gmail.com>
Date: Tue, 30 Nov 2010 10:33:49 +0800
> I have thought about converting this AVL tree to rbtree. When I saw
> the comment above, I gave it up, because rbtree makes this structure
> bigger. If ipv6 support is added, I think it is time to turn to
> rbtree. :)
If it takes size over 128 bytes, it is probably still a bad idea.
Right now it is just under 128.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 1/3] inetpeer: Support ipv6 addresses.
2010-11-30 3:14 ` David Miller
@ 2010-11-30 4:51 ` Changli Gao
2010-11-30 5:22 ` David Miller
0 siblings, 1 reply; 13+ messages in thread
From: Changli Gao @ 2010-11-30 4:51 UTC (permalink / raw)
To: David Miller; +Cc: netdev
On Tue, Nov 30, 2010 at 11:14 AM, David Miller <davem@davemloft.net> wrote:
> From: Changli Gao <xiaosuo@gmail.com>
> Date: Tue, 30 Nov 2010 10:33:49 +0800
>
>> I have thought about converting this AVL tree to rbtree. When I saw
>> the comment above, I gave it up, because rbtree makes this structure
>> bigger. If ipv6 support is added, I think it is time to turn to
>> rbtree. :)
>
> If it takes size over 128 bytes, it is probably still a bad idea.
> Right now it is just under 128.
>
Why 128? The current size of inet_peer is 64 bytes, and after your
ipv6 patch, it is just 80. And, inet_peer is allocated from its own
mem_cache, so the size of the memory for it should not be aligned to
2^n.
If rbtree is used instead, extra 8 bytes will be needed, and the size
of inet_peer will be 88.
--
Regards,
Changli Gao(xiaosuo@gmail.com)
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 1/3] inetpeer: Support ipv6 addresses.
2010-11-30 4:51 ` Changli Gao
@ 2010-11-30 5:22 ` David Miller
2010-11-30 5:42 ` Eric Dumazet
0 siblings, 1 reply; 13+ messages in thread
From: David Miller @ 2010-11-30 5:22 UTC (permalink / raw)
To: xiaosuo; +Cc: netdev
From: Changli Gao <xiaosuo@gmail.com>
Date: Tue, 30 Nov 2010 12:51:08 +0800
> On Tue, Nov 30, 2010 at 11:14 AM, David Miller <davem@davemloft.net> wrote:
>> From: Changli Gao <xiaosuo@gmail.com>
>> Date: Tue, 30 Nov 2010 10:33:49 +0800
>>
>>> I have thought about converting this AVL tree to rbtree. When I saw
>>> the comment above, I gave it up, because rbtree makes this structure
>>> bigger. If ipv6 support is added, I think it is time to turn to
>>> rbtree. :)
>>
>> If it takes size over 128 bytes, it is probably still a bad idea.
>> Right now it is just under 128.
>>
>
> Why 128? The current size of inet_peer is 64 bytes, and after your
> ipv6 patch, it is just 80. And, inet_peer is allocated from its own
> mem_cache, so the size of the memory for it should not be aligned to
> 2^n.
Sorry, I was taking into the consideration other work I am doing
which will move all of the routing metrics into inet_peer as well.
With ipv6 address support, it fits perfectly into 128 bytes on
64-bit.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 1/3] inetpeer: Support ipv6 addresses.
2010-11-30 5:22 ` David Miller
@ 2010-11-30 5:42 ` Eric Dumazet
2010-11-30 5:53 ` David Miller
0 siblings, 1 reply; 13+ messages in thread
From: Eric Dumazet @ 2010-11-30 5:42 UTC (permalink / raw)
To: David Miller; +Cc: xiaosuo, netdev
Le lundi 29 novembre 2010 à 21:22 -0800, David Miller a écrit :
> From: Changli Gao <xiaosuo@gmail.com>
> Date: Tue, 30 Nov 2010 12:51:08 +0800
>
> > On Tue, Nov 30, 2010 at 11:14 AM, David Miller <davem@davemloft.net> wrote:
> >> From: Changli Gao <xiaosuo@gmail.com>
> >> Date: Tue, 30 Nov 2010 10:33:49 +0800
> >>
> >>> I have thought about converting this AVL tree to rbtree. When I saw
> >>> the comment above, I gave it up, because rbtree makes this structure
> >>> bigger. If ipv6 support is added, I think it is time to turn to
> >>> rbtree. :)
> >>
> >> If it takes size over 128 bytes, it is probably still a bad idea.
> >> Right now it is just under 128.
> >>
> >
> > Why 128? The current size of inet_peer is 64 bytes, and after your
> > ipv6 patch, it is just 80. And, inet_peer is allocated from its own
> > mem_cache, so the size of the memory for it should not be aligned to
> > 2^n.
>
> Sorry, I was taking into the consideration other work I am doing
> which will move all of the routing metrics into inet_peer as well.
>
> With ipv6 address support, it fits perfectly into 128 bytes on
> 64-bit.
> --
Its a bit early in the morning here, I must confess I dont yet
understand your patch David :)
As we use a tree, why not using two different trees for ipv4 / ipv6 ?
I dont understand how computing a 32bit key (sort of hash key) is going
to help when hash collision happens, with an avl tree.
For Changli : AVL tree was OK for RCU conversion, I am not sure about
RBtree yet.
Either version of tree (AVL/rbtree) will be expensive to use if depth is
big (With 2 millions entries, depth is going to be very big). I
understand you want to get rid of route cache ?
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 1/3] inetpeer: Support ipv6 addresses.
2010-11-30 5:42 ` Eric Dumazet
@ 2010-11-30 5:53 ` David Miller
2010-11-30 6:11 ` Eric Dumazet
0 siblings, 1 reply; 13+ messages in thread
From: David Miller @ 2010-11-30 5:53 UTC (permalink / raw)
To: eric.dumazet; +Cc: xiaosuo, netdev
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 30 Nov 2010 06:42:16 +0100
> Its a bit early in the morning here, I must confess I dont yet
> understand your patch David :)
>
> As we use a tree, why not using two different trees for ipv4 / ipv6 ?
The "key" just creates a natural ordering in the tree, it's
almost arbitrary except that it must distribute well amongst
the entries.
I currently don't see any reason to make two trees right now.
> I dont understand how computing a 32bit key (sort of hash key) is going
> to help when hash collision happens, with an avl tree.
> Either version of tree (AVL/rbtree) will be expensive to use if depth is
> big (With 2 millions entries, depth is going to be very big). I
> understand you want to get rid of route cache ?
Do we plan to talk to 2 million unique destinations and have active
non-default metrics for each one of them very often?
inet_peer entries will only get created when we need to make
non-default metric settings for a specific destination address.
See that's the thing, it's scope is so much smaller than the existing
routing cache. It's only going to be used in limited if not
controlled cases.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 1/3] inetpeer: Support ipv6 addresses.
2010-11-30 5:53 ` David Miller
@ 2010-11-30 6:11 ` Eric Dumazet
2010-11-30 6:31 ` David Miller
0 siblings, 1 reply; 13+ messages in thread
From: Eric Dumazet @ 2010-11-30 6:11 UTC (permalink / raw)
To: David Miller; +Cc: xiaosuo, netdev
Le lundi 29 novembre 2010 à 21:53 -0800, David Miller a écrit :
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Tue, 30 Nov 2010 06:42:16 +0100
>
> > Its a bit early in the morning here, I must confess I dont yet
> > understand your patch David :)
> >
> > As we use a tree, why not using two different trees for ipv4 / ipv6 ?
>
> The "key" just creates a natural ordering in the tree, it's
> almost arbitrary except that it must distribute well amongst
> the entries.
Hmm. AVL search must take a decision, take the left or the right path.
if current key is equal, which path do you take ?
@@ -165,9 +208,9 @@ static void unlink_from_unused(struct inet_peer *p)
> for (u = rcu_dereference_protected(peers.root, \
> lockdep_is_held(&peers.lock)); \
> u != peer_avl_empty; ) { \
> - if (_daddr == u->v4daddr) \
> + if (inet_peer_addr_equal(_daddr, &u->daddr)) \
> break; \
> - if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \
> + if (key < inet_peer_key(&u->daddr)) \
> v = &u->avl_left; \
> else \
> v = &u->avl_right; \
Apparently you take the right one, you may miss the target if its on the
left path ?
>
> I currently don't see any reason to make two trees right now.
>
Cost of a tree is one pointer, and ipv4 search would be faster if we use
different search functions.
> > I dont understand how computing a 32bit key (sort of hash key) is going
> > to help when hash collision happens, with an avl tree.
> > Either version of tree (AVL/rbtree) will be expensive to use if depth is
> > big (With 2 millions entries, depth is going to be very big). I
> > understand you want to get rid of route cache ?
>
> Do we plan to talk to 2 million unique destinations and have active
> non-default metrics for each one of them very often?
>
> inet_peer entries will only get created when we need to make
> non-default metric settings for a specific destination address.
>
> See that's the thing, it's scope is so much smaller than the existing
> routing cache. It's only going to be used in limited if not
> controlled cases.
>
OK good :)
I have no idea how many addresses have non default metric settings.
Do you know how to make an estimation on a server ?
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 1/3] inetpeer: Support ipv6 addresses.
2010-11-30 6:11 ` Eric Dumazet
@ 2010-11-30 6:31 ` David Miller
0 siblings, 0 replies; 13+ messages in thread
From: David Miller @ 2010-11-30 6:31 UTC (permalink / raw)
To: eric.dumazet; +Cc: xiaosuo, netdev
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 30 Nov 2010 07:11:19 +0100
> Le lundi 29 novembre 2010 à 21:53 -0800, David Miller a écrit :
>> From: Eric Dumazet <eric.dumazet@gmail.com>
>> Date: Tue, 30 Nov 2010 06:42:16 +0100
>>
>> > Its a bit early in the morning here, I must confess I dont yet
>> > understand your patch David :)
>> >
>> > As we use a tree, why not using two different trees for ipv4 / ipv6 ?
>>
>> The "key" just creates a natural ordering in the tree, it's
>> almost arbitrary except that it must distribute well amongst
>> the entries.
>
> Hmm. AVL search must take a decision, take the left or the right path.
>
> if current key is equal, which path do you take ?
Right. :-) But yes there is some error that needs to
be handled in that equal keys can lead to imporper paths
as you showed.
I guess I do need to make a seperate tree, how disappointing.
Anyways I will work on fixing this.
> Do you know how to make an estimation on a server ?
No way to do it straightforward I guess because right now TCP metrics
key on all sorts of rediculious things like source address, TOS, and
other crap as a consequence of how the routing cache works.
So much replicated information and wasted memory.
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2010-11-30 6:30 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-11-29 21:44 [PATCH 1/3] inetpeer: Support ipv6 addresses David Miller
2010-11-29 22:33 ` Stephen Hemminger
2010-11-30 3:18 ` David Miller
2010-11-30 2:11 ` Brian Haley
2010-11-30 3:18 ` David Miller
2010-11-30 2:33 ` Changli Gao
2010-11-30 3:14 ` David Miller
2010-11-30 4:51 ` Changli Gao
2010-11-30 5:22 ` David Miller
2010-11-30 5:42 ` Eric Dumazet
2010-11-30 5:53 ` David Miller
2010-11-30 6:11 ` Eric Dumazet
2010-11-30 6:31 ` David Miller
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).