netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/5] rtcache remove respin
@ 2012-07-01 12:02 David Miller
  2012-07-01 12:15 ` David Miller
  2012-07-02 10:44 ` Eric Dumazet
  0 siblings, 2 replies; 8+ messages in thread
From: David Miller @ 2012-07-01 12:02 UTC (permalink / raw)
  To: netdev


It's been a while and there were of course a lot of merge hassles with
the most recent set I posted, so I respun these patches tonight
because I wanted to see the effects of the recent rpfilter hacks on an
rtcache-less system.

On a SPARC T3-1:

1) Output route lookup: ~2800 cycles
2) Input route lookups: ~3000 cycles (rpfilter=0)
                        ~4300 cycles (rpfilter=1)

Another nice part is how small struct rtable is after this patch set:

struct rtable {
	struct dst_entry        dst;
	int                     rt_genid;
	unsigned int            rt_flags;
	__u16                   rt_type;
	__be32                  rt_dst;
	int                     rt_route_iif;
	int                     rt_iif;
	int                     rt_oif;
	__be32                  rt_gateway;
	u32                     rt_peer_genid;
	unsigned long           _peer;
	struct fib_info         *fi;
};

which is about 208 bytes on sparc64.

Signed-off-by: David S. Miller <davem@davemloft.net>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 0/5] rtcache remove respin
  2012-07-01 12:02 [PATCH 0/5] rtcache remove respin David Miller
@ 2012-07-01 12:15 ` David Miller
  2012-07-03 10:56   ` David Miller
  2012-07-02 10:44 ` Eric Dumazet
  1 sibling, 1 reply; 8+ messages in thread
From: David Miller @ 2012-07-01 12:15 UTC (permalink / raw)
  To: netdev

From: David Miller <davem@davemloft.net>
Date: Sun, 01 Jul 2012 05:02:43 -0700 (PDT)

> On a SPARC T3-1:
> 
> 1) Output route lookup: ~2800 cycles
> 2) Input route lookups: ~3000 cycles (rpfilter=0)
>                         ~4300 cycles (rpfilter=1)

Out of curiosity I got rid of the local table and made all routes
go into the main table and those numbers above become:

1) Output route lookup: ~2500 cycles
2) Input route lookups: ~2800 cycles (rpfilter=0)
                        ~4100 cycles (rpfilter=1)

====================
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 3dc7c96..5a103a9 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -186,9 +186,7 @@ static inline struct fib_table *fib_get_table(struct net *net, u32 id)
 {
 	struct hlist_head *ptr;
 
-	ptr = id == RT_TABLE_LOCAL ?
-		&net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX] :
-		&net->ipv4.fib_table_hash[TABLE_MAIN_INDEX];
+	ptr = &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX];
 	return hlist_entry(ptr->first, struct fib_table, tb_hlist);
 }
 
@@ -202,10 +200,6 @@ static inline int fib_lookup(struct net *net, const struct flowi4 *flp,
 {
 	struct fib_table *table;
 
-	table = fib_get_table(net, RT_TABLE_LOCAL);
-	if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF))
-		return 0;
-
 	table = fib_get_table(net, RT_TABLE_MAIN);
 	if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF))
 		return 0;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 44bf82e..bdc0231 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -160,7 +160,7 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
 		/* Fallback to FIB local table so that communication
 		 * over loopback subnets work.
 		 */
-		local = fib_get_table(net, RT_TABLE_LOCAL);
+		local = fib_get_table(net, RT_TABLE_MAIN);
 		if (local &&
 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
 		    res.type == RTN_LOCAL)
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index a658fb4..0056542 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -52,16 +52,10 @@ static int __net_init fib4_rules_init(struct net *net)
 {
 	struct fib_table *local_table, *main_table;
 
-	local_table = fib_trie_table(RT_TABLE_LOCAL);
-	if (local_table == NULL)
-		return -ENOMEM;
-
 	main_table  = fib_trie_table(RT_TABLE_MAIN);
 	if (main_table == NULL)
 		goto fail;
 
-	hlist_add_head_rcu(&local_table->tb_hlist,
-				&net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
 	hlist_add_head_rcu(&main_table->tb_hlist,
 				&net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
 	return 0;
@@ -155,7 +149,7 @@ static inline unsigned int __inet_dev_addr_type(struct net *net,
 	res.r = NULL;
 #endif
 
-	local_table = fib_get_table(net, RT_TABLE_LOCAL);
+	local_table = fib_get_table(net, RT_TABLE_MAIN);
 	if (local_table) {
 		ret = RTN_UNICAST;
 		rcu_read_lock();
@@ -702,11 +696,7 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad
 		},
 	};
 
-	if (type == RTN_UNICAST)
-		tb = fib_new_table(net, RT_TABLE_MAIN);
-	else
-		tb = fib_new_table(net, RT_TABLE_LOCAL);
-
+	tb = fib_new_table(net, RT_TABLE_MAIN);
 	if (tb == NULL)
 		return;
 
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index b23fd95..41a53b0 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -284,9 +284,6 @@ static int fib_default_rules_init(struct fib_rules_ops *ops)
 {
 	int err;
 
-	err = fib_default_rule_add(ops, 0, RT_TABLE_LOCAL, 0);
-	if (err < 0)
-		return err;
 	err = fib_default_rule_add(ops, 0x7FFE, RT_TABLE_MAIN, 0);
 	if (err < 0)
 		return err;

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 0/5] rtcache remove respin
  2012-07-01 12:02 [PATCH 0/5] rtcache remove respin David Miller
  2012-07-01 12:15 ` David Miller
@ 2012-07-02 10:44 ` Eric Dumazet
  2012-07-02 10:59   ` David Miller
  2012-07-05 10:15   ` David Miller
  1 sibling, 2 replies; 8+ messages in thread
From: Eric Dumazet @ 2012-07-02 10:44 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

On Sun, 2012-07-01 at 05:02 -0700, David Miller wrote:
> It's been a while and there were of course a lot of merge hassles with
> the most recent set I posted, so I respun these patches tonight
> because I wanted to see the effects of the recent rpfilter hacks on an
> rtcache-less system.
> 
> On a SPARC T3-1:
> 
> 1) Output route lookup: ~2800 cycles
> 2) Input route lookups: ~3000 cycles (rpfilter=0)
>                         ~4300 cycles (rpfilter=1)
> 
> Another nice part is how small struct rtable is after this patch set:
> 
> struct rtable {
> 	struct dst_entry        dst;
> 	int                     rt_genid;
> 	unsigned int            rt_flags;
> 	__u16                   rt_type;
> 	__be32                  rt_dst;
> 	int                     rt_route_iif;
> 	int                     rt_iif;
> 	int                     rt_oif;
> 	__be32                  rt_gateway;
> 	u32                     rt_peer_genid;
> 	unsigned long           _peer;
> 	struct fib_info         *fi;
> };
> 
> which is about 208 bytes on sparc64.
> 
> Signed-off-by: David S. Miller <davem@davemloft.net>

Can be <= 192 actually

rcu_head not needed anymore in dst_entry

If we still want __refcnt being on cache line boundary, we might find a
better way to accomplish this.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 0/5] rtcache remove respin
  2012-07-02 10:44 ` Eric Dumazet
@ 2012-07-02 10:59   ` David Miller
  2012-07-05 10:15   ` David Miller
  1 sibling, 0 replies; 8+ messages in thread
From: David Miller @ 2012-07-02 10:59 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 02 Jul 2012 12:44:01 +0200

> On Sun, 2012-07-01 at 05:02 -0700, David Miller wrote:
>> It's been a while and there were of course a lot of merge hassles with
>> the most recent set I posted, so I respun these patches tonight
>> because I wanted to see the effects of the recent rpfilter hacks on an
>> rtcache-less system.
>> 
>> On a SPARC T3-1:
>> 
>> 1) Output route lookup: ~2800 cycles
>> 2) Input route lookups: ~3000 cycles (rpfilter=0)
>>                         ~4300 cycles (rpfilter=1)
>> 
>> Another nice part is how small struct rtable is after this patch set:
>> 
>> struct rtable {
>> 	struct dst_entry        dst;
>> 	int                     rt_genid;
>> 	unsigned int            rt_flags;
>> 	__u16                   rt_type;
>> 	__be32                  rt_dst;
>> 	int                     rt_route_iif;
>> 	int                     rt_iif;
>> 	int                     rt_oif;
>> 	__be32                  rt_gateway;
>> 	u32                     rt_peer_genid;
>> 	unsigned long           _peer;
>> 	struct fib_info         *fi;
>> };
>> 
>> which is about 208 bytes on sparc64.
>> 
>> Signed-off-by: David S. Miller <davem@davemloft.net>
> 
> Can be <= 192 actually
> 
> rcu_head not needed anymore in dst_entry
> 
> If we still want __refcnt being on cache line boundary, we might find a
> better way to accomplish this.

Once we can actually check something like this in, rt_dst is
guarenteed to be eliminated as well.

The dst neighbour pointer will also be gone.

So lots of shrinking still to go and yes we'll need to reposition
that __refcnt member carefully.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 0/5] rtcache remove respin
  2012-07-01 12:15 ` David Miller
@ 2012-07-03 10:56   ` David Miller
  0 siblings, 0 replies; 8+ messages in thread
From: David Miller @ 2012-07-03 10:56 UTC (permalink / raw)
  To: netdev

From: David Miller <davem@davemloft.net>
Date: Sun, 01 Jul 2012 05:15:56 -0700 (PDT)

> From: David Miller <davem@davemloft.net>
> Date: Sun, 01 Jul 2012 05:02:43 -0700 (PDT)
> 
>> On a SPARC T3-1:
>> 
>> 1) Output route lookup: ~2800 cycles
>> 2) Input route lookups: ~3000 cycles (rpfilter=0)
>>                         ~4300 cycles (rpfilter=1)
> 
> Out of curiosity I got rid of the local table and made all routes
> go into the main table and those numbers above become:
> 
> 1) Output route lookup: ~2500 cycles
> 2) Input route lookups: ~2800 cycles (rpfilter=0)
>                         ~4100 cycles (rpfilter=1)
> 

And with the neighbour patches I posted tonight these numbers are:

1) Output route lookup: ~2150 cycles
2) Input route lookups: ~2600 cycles (rpfilter=0)
                        ~3700 cycles (rpfilter=1)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 0/5] rtcache remove respin
  2012-07-02 10:44 ` Eric Dumazet
  2012-07-02 10:59   ` David Miller
@ 2012-07-05 10:15   ` David Miller
  2012-07-05 19:03     ` Eric Dumazet
  1 sibling, 1 reply; 8+ messages in thread
From: David Miller @ 2012-07-05 10:15 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 02 Jul 2012 12:44:01 +0200

> If we still want __refcnt being on cache line boundary, we might find a
> better way to accomplish this.

Back to this issue again.

Eric, if you take a look at net-next right now, I left a dummy padding
in dst_entry where the neighbour pointer used to be.

Can you come up with some way to make use of that new space?

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 0/5] rtcache remove respin
  2012-07-05 10:15   ` David Miller
@ 2012-07-05 19:03     ` Eric Dumazet
  2012-07-05 21:32       ` David Miller
  0 siblings, 1 reply; 8+ messages in thread
From: Eric Dumazet @ 2012-07-05 19:03 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

On Thu, 2012-07-05 at 03:15 -0700, David Miller wrote:
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Mon, 02 Jul 2012 12:44:01 +0200
> 
> > If we still want __refcnt being on cache line boundary, we might find a
> > better way to accomplish this.
> 
> Back to this issue again.
> 
> Eric, if you take a look at net-next right now, I left a dummy padding
> in dst_entry where the neighbour pointer used to be.
> 
> Can you come up with some way to make use of that new space?
> 

If route cache is removed, I believe we can remove all paddings.

Each tcp session will have its own dst_entry, instead of being shared.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 0/5] rtcache remove respin
  2012-07-05 19:03     ` Eric Dumazet
@ 2012-07-05 21:32       ` David Miller
  0 siblings, 0 replies; 8+ messages in thread
From: David Miller @ 2012-07-05 21:32 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 05 Jul 2012 21:03:37 +0200

> If route cache is removed, I believe we can remove all paddings.
> 
> Each tcp session will have its own dst_entry, instead of being shared.

Not really, the routing cache removal patches have poor performance
and won't go-in as-is. :-) Once PMTU/redirect/TCP-metrics are reworked
I plan to do things like the patch below to make the performance loss
more acceptable.

And then I'll do the same for input routes too, at which point your
'noref' case can be put back.

So really, we have to consider how to rework the layout of this
structure.

Thanks.

====================
ipv4: Cache output routes in fib_info nexthops.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h     |    3 +++
 net/ipv4/fib_semantics.c |    2 ++
 net/ipv4/route.c         |    9 +++++++++
 3 files changed, 14 insertions(+)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 3dc7c96..ff9f0c4 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -45,6 +45,7 @@ struct fib_config {
  };
 
 struct fib_info;
+struct rtable;
 
 struct fib_nh {
 	struct net_device	*nh_dev;
@@ -63,6 +64,8 @@ struct fib_nh {
 	__be32			nh_gw;
 	__be32			nh_saddr;
 	int			nh_saddr_genid;
+
+	struct rtable		*rth;
 };
 
 /*
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c46c20b..f3ada74 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -148,6 +148,8 @@ static void free_fib_info_rcu(struct rcu_head *head)
 	change_nexthops(fi) {
 		if (nexthop_nh->nh_dev)
 			dev_put(nexthop_nh->nh_dev);
+		if (nexthop_nh->rth)
+			dst_release(&nexthop_nh->rth->dst);
 	} endfor_nexthops(fi);
 
 	release_net(fi->fib_net);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 9f68f74..35bfd98 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -914,6 +914,8 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4,
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		dst->tclassid = FIB_RES_NH(*res).nh_tclassid;
 #endif
+		FIB_RES_NH(*res).rth = rt;
+		dst_clone(&rt->dst);
 	}
 
 	if (dst_mtu(dst) > IP_MAX_MTU)
@@ -1399,6 +1401,13 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
 			fi = NULL;
 	}
 
+	if (fi) {
+		rth = FIB_RES_NH(*res).rth;
+		if (rth) {
+			dst_use(&rth->dst, jiffies);
+			return rth;
+		}
+	}
 	rth = rt_dst_alloc(dev_out,
 			   IN_DEV_CONF_GET(in_dev, NOPOLICY),
 			   IN_DEV_CONF_GET(in_dev, NOXFRM));
-- 
1.7.10

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2012-07-05 21:32 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-07-01 12:02 [PATCH 0/5] rtcache remove respin David Miller
2012-07-01 12:15 ` David Miller
2012-07-03 10:56   ` David Miller
2012-07-02 10:44 ` Eric Dumazet
2012-07-02 10:59   ` David Miller
2012-07-05 10:15   ` David Miller
2012-07-05 19:03     ` Eric Dumazet
2012-07-05 21:32       ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).