netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] ipv4: Cache source address in nexthop entries.
@ 2011-03-08  4:58 David Miller
  2011-03-08  9:57 ` Julian Anastasov
  0 siblings, 1 reply; 10+ messages in thread
From: David Miller @ 2011-03-08  4:58 UTC (permalink / raw)
  To: netdev


When doing output route lookups, we have to select the source address
if the user has not specified an explicit one.

First, if the route has an explicit preferred source address
specified, then we use that.

Otherwise we search the route's outgoing interface for a suitable
address.

This search can be precomputed and cached at route insertion time.

The only missing part is that we have to refresh this precomputed
value any time addresses are added or removed from the interface, and
this is accomplished by fib_update_nh_saddrs().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h     |    7 +++++--
 net/ipv4/fib_frontend.c  |    2 ++
 net/ipv4/fib_semantics.c |   31 ++++++++++++++++++++++++-------
 3 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 523a170..0e14083 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -60,6 +60,7 @@ struct fib_nh {
 #endif
 	int			nh_oif;
 	__be32			nh_gw;
+	__be32			nh_saddr;
 };
 
 /*
@@ -139,11 +140,13 @@ struct fib_result_nl {
 
 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
 
-#define FIB_RES_PREFSRC(res)		((res).fi->fib_prefsrc ? : __fib_res_prefsrc(&res))
+#define FIB_RES_SADDR(res)		(FIB_RES_NH(res).nh_saddr)
 #define FIB_RES_GW(res)			(FIB_RES_NH(res).nh_gw)
 #define FIB_RES_DEV(res)		(FIB_RES_NH(res).nh_dev)
 #define FIB_RES_OIF(res)		(FIB_RES_NH(res).nh_oif)
 
+#define FIB_RES_PREFSRC(res)		((res).fi->fib_prefsrc ? : FIB_RES_SADDR(res))
+
 struct fib_table {
 	struct hlist_node tb_hlist;
 	u32		tb_id;
@@ -224,8 +227,8 @@ extern void fib_select_default(struct fib_result *res);
 extern int ip_fib_check_default(__be32 gw, struct net_device *dev);
 extern int fib_sync_down_dev(struct net_device *dev, int force);
 extern int fib_sync_down_addr(struct net *net, __be32 local);
+extern void fib_update_nh_saddrs(struct net_device *dev);
 extern int fib_sync_up(struct net_device *dev);
-extern __be32  __fib_res_prefsrc(struct fib_result *res);
 extern void fib_select_multipath(const struct flowi *flp, struct fib_result *res);
 
 /* Exported by fib_trie.c */
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index ad0778a..1d2233c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -890,10 +890,12 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 		fib_sync_up(dev);
 #endif
+		fib_update_nh_saddrs(dev);
 		rt_cache_flush(dev_net(dev), -1);
 		break;
 	case NETDEV_DOWN:
 		fib_del_ifaddr(ifa);
+		fib_update_nh_saddrs(dev);
 		if (ifa->ifa_dev->ifa_list == NULL) {
 			/* Last address was deleted from this interface.
 			 * Disable IP.
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 6349a21..952c737 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -853,6 +853,12 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 				goto err_inval;
 	}
 
+	change_nexthops(fi) {
+		nexthop_nh->nh_saddr = inet_select_addr(nexthop_nh->nh_dev,
+							nexthop_nh->nh_gw,
+							nexthop_nh->nh_scope);
+	} endfor_nexthops(fi)
+
 link_it:
 	ofi = fib_find_info(fi);
 	if (ofi) {
@@ -898,13 +904,6 @@ failure:
 	return ERR_PTR(err);
 }
 
-/* Find appropriate source address to this destination */
-
-__be32 __fib_res_prefsrc(struct fib_result *res)
-{
-	return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
-}
-
 int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 		  u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
 		  struct fib_info *fi, unsigned int flags)
@@ -1128,6 +1127,24 @@ out:
 	return;
 }
 
+void fib_update_nh_saddrs(struct net_device *dev)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct fib_nh *nh;
+	unsigned int hash;
+
+	hash = fib_devindex_hashfn(dev->ifindex);
+	head = &fib_info_devhash[hash];
+	hlist_for_each_entry(nh, node, head, nh_hash) {
+		if (nh->nh_dev != dev)
+			continue;
+		nh->nh_saddr = inet_select_addr(nh->nh_dev,
+						nh->nh_gw,
+						nh->nh_scope);
+	}
+}
+
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 
 /*
-- 
1.7.4.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] ipv4: Cache source address in nexthop entries.
  2011-03-08  4:58 [PATCH] ipv4: Cache source address in nexthop entries David Miller
@ 2011-03-08  9:57 ` Julian Anastasov
  2011-03-08 18:38   ` David Miller
  0 siblings, 1 reply; 10+ messages in thread
From: Julian Anastasov @ 2011-03-08  9:57 UTC (permalink / raw)
  To: David Miller; +Cc: netdev


 	Hello,

On Mon, 7 Mar 2011, David Miller wrote:

> When doing output route lookups, we have to select the source address
> if the user has not specified an explicit one.
>
> First, if the route has an explicit preferred source address
> specified, then we use that.
>
> Otherwise we search the route's outgoing interface for a suitable
> address.
>
> This search can be precomputed and cached at route insertion time.
>
> The only missing part is that we have to refresh this precomputed
> value any time addresses are added or removed from the interface, and
> this is accomplished by fib_update_nh_saddrs().
>
> Signed-off-by: David S. Miller <davem@davemloft.net>
> ---
> include/net/ip_fib.h     |    7 +++++--
> net/ipv4/fib_frontend.c  |    2 ++
> net/ipv4/fib_semantics.c |   31 ++++++++++++++++++++++++-------
> 3 files changed, 31 insertions(+), 9 deletions(-)
>
> diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
> index 523a170..0e14083 100644
> --- a/include/net/ip_fib.h
> +++ b/include/net/ip_fib.h
> @@ -60,6 +60,7 @@ struct fib_nh {
> #endif
> 	int			nh_oif;
> 	__be32			nh_gw;
> +	__be32			nh_saddr;
> };
>
> /*
> @@ -139,11 +140,13 @@ struct fib_result_nl {
>
> #endif /* CONFIG_IP_ROUTE_MULTIPATH */
>
> -#define FIB_RES_PREFSRC(res)		((res).fi->fib_prefsrc ? : __fib_res_prefsrc(&res))
> +#define FIB_RES_SADDR(res)		(FIB_RES_NH(res).nh_saddr)
> #define FIB_RES_GW(res)			(FIB_RES_NH(res).nh_gw)
> #define FIB_RES_DEV(res)		(FIB_RES_NH(res).nh_dev)
> #define FIB_RES_OIF(res)		(FIB_RES_NH(res).nh_oif)
>
> +#define FIB_RES_PREFSRC(res)		((res).fi->fib_prefsrc ? : FIB_RES_SADDR(res))
> +
> struct fib_table {
> 	struct hlist_node tb_hlist;
> 	u32		tb_id;
> @@ -224,8 +227,8 @@ extern void fib_select_default(struct fib_result *res);
> extern int ip_fib_check_default(__be32 gw, struct net_device *dev);
> extern int fib_sync_down_dev(struct net_device *dev, int force);
> extern int fib_sync_down_addr(struct net *net, __be32 local);
> +extern void fib_update_nh_saddrs(struct net_device *dev);
> extern int fib_sync_up(struct net_device *dev);
> -extern __be32  __fib_res_prefsrc(struct fib_result *res);
> extern void fib_select_multipath(const struct flowi *flp, struct fib_result *res);
>
> /* Exported by fib_trie.c */
> diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
> index ad0778a..1d2233c 100644
> --- a/net/ipv4/fib_frontend.c
> +++ b/net/ipv4/fib_frontend.c
> @@ -890,10 +890,12 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
> #ifdef CONFIG_IP_ROUTE_MULTIPATH
> 		fib_sync_up(dev);
> #endif
> +		fib_update_nh_saddrs(dev);
> 		rt_cache_flush(dev_net(dev), -1);
> 		break;
> 	case NETDEV_DOWN:
> 		fib_del_ifaddr(ifa);
> +		fib_update_nh_saddrs(dev);
> 		if (ifa->ifa_dev->ifa_list == NULL) {
> 			/* Last address was deleted from this interface.
> 			 * Disable IP.
> diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
> index 6349a21..952c737 100644
> --- a/net/ipv4/fib_semantics.c
> +++ b/net/ipv4/fib_semantics.c
> @@ -853,6 +853,12 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
> 				goto err_inval;
> 	}
>
> +	change_nexthops(fi) {
> +		nexthop_nh->nh_saddr = inet_select_addr(nexthop_nh->nh_dev,
> +							nexthop_nh->nh_gw,
> +							nexthop_nh->nh_scope);
> +	} endfor_nexthops(fi)
> +
> link_it:
> 	ofi = fib_find_info(fi);
> 	if (ofi) {
> @@ -898,13 +904,6 @@ failure:
> 	return ERR_PTR(err);
> }
>
> -/* Find appropriate source address to this destination */
> -
> -__be32 __fib_res_prefsrc(struct fib_result *res)
> -{
> -	return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
> -}
> -
> int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
> 		  u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
> 		  struct fib_info *fi, unsigned int flags)
> @@ -1128,6 +1127,24 @@ out:
> 	return;
> }
>
> +void fib_update_nh_saddrs(struct net_device *dev)
> +{
> +	struct hlist_head *head;
> +	struct hlist_node *node;
> +	struct fib_nh *nh;
> +	unsigned int hash;
> +
> +	hash = fib_devindex_hashfn(dev->ifindex);
> +	head = &fib_info_devhash[hash];
> +	hlist_for_each_entry(nh, node, head, nh_hash) {
> +		if (nh->nh_dev != dev)
> +			continue;
> +		nh->nh_saddr = inet_select_addr(nh->nh_dev,
> +						nh->nh_gw,
> +						nh->nh_scope);

 	The idea to have per-nexthop src is good, mostly for
multipath routes. May be one day it will be possible to
provide prefsrc for nexthop from user space.

 	But I see problems with this patch. nh_scope
indicates if the nexthop is gatewayed (RT_SCOPE_LINK) or
direct (RT_SCOPE_HOST). OTOH, the addresses can be with
different scope: host, link, global (default). Same for
the routes.

 	Before now it was possible to bring device up, to add
scope host address there, to add some link route without prefsrc.
In this case the output route will ignore the scope host address,
it can not be exposed to link or universe, global address
from another device will be selected. inet_select_addr works
in this way: get address with scope <= provided_scope from
provided_device or global address from another device.

 	It means, even if there are addresses on the
concerned device it does not mean the routes on this device
are required to use prefsrc from this device. We must
restrict the scope according to the provided for the
route: cfg->fc_scope. That is how it worked before now:
__fib_res_prefsrc uses res->scope, it was set by
fib_semantic_match() with fa_scope, fa_scope is set
from cfg->fc_scope. Then we will select proper saddr
according to the provided scope for the route: host,
link or global route.

 	Also, may be we must be prepared to call
fib_update_nh_saddrs for last_ip events, just like
fib_sync_down_addr is called. Or we have to implement it
as fib_saddr/fib_src and fib_update_nh_saddrs should be part of
fib_sync_down_addr. fib_saddr can be set from fib_prefsrc or
from inet_select_addr(nh->nh_dev, nh->nh_gw, cfg->fc_scope)
and then we should replace fib_prefsrc hashing with
fib_saddr hashing, so that events can invalidate these
routes.

 	But as we need nh_saddr to replace __fib_res_prefsrc()
I think we should add additional hashing, just like for
fib_prefsrc, we need nh_saddr_hash. fib_update_nh_saddrs
should be called to walk nh_saddr_hash entries, from
the same place where fib_sync_down_addr is called.
As the addresses have nothing to do with the link
state, I don't think it is correct to call fib_update_nh_saddrs
for DEV events.

> +	}
> +}
> +
> #ifdef CONFIG_IP_ROUTE_MULTIPATH
>
> /*
> -- 
> 1.7.4.1

Regards

--
Julian Anastasov <ja@ssi.bg>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] ipv4: Cache source address in nexthop entries.
  2011-03-08  9:57 ` Julian Anastasov
@ 2011-03-08 18:38   ` David Miller
  2011-03-08 19:03     ` David Miller
  2011-03-09  0:49     ` Julian Anastasov
  0 siblings, 2 replies; 10+ messages in thread
From: David Miller @ 2011-03-08 18:38 UTC (permalink / raw)
  To: ja; +Cc: netdev

From: Julian Anastasov <ja@ssi.bg>
Date: Tue, 8 Mar 2011 11:57:38 +0200 (EET)

> 	It means, even if there are addresses on the
> concerned device it does not mean the routes on this device
> are required to use prefsrc from this device. We must
> restrict the scope according to the provided for the
> route: cfg->fc_scope.

Simple to fix, I'll remember the fc_scope value in the nexthop
and use that in the address selection call.

> As the addresses have nothing to do with the link
> state, I don't think it is correct to call fib_update_nh_saddrs
> for DEV events.

fib_update_nh_saddrs() gets called for "DEV" events since those
are what are emitted when addresses are addded and removed.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] ipv4: Cache source address in nexthop entries.
  2011-03-08 18:38   ` David Miller
@ 2011-03-08 19:03     ` David Miller
  2011-03-15 10:00       ` Julian Anastasov
  2011-03-09  0:49     ` Julian Anastasov
  1 sibling, 1 reply; 10+ messages in thread
From: David Miller @ 2011-03-08 19:03 UTC (permalink / raw)
  To: ja; +Cc: netdev

From: David Miller <davem@davemloft.net>
Date: Tue, 08 Mar 2011 10:38:01 -0800 (PST)

> From: Julian Anastasov <ja@ssi.bg>
> Date: Tue, 8 Mar 2011 11:57:38 +0200 (EET)
> 
>> 	It means, even if there are addresses on the
>> concerned device it does not mean the routes on this device
>> are required to use prefsrc from this device. We must
>> restrict the scope according to the provided for the
>> route: cfg->fc_scope.
> 
> Simple to fix, I'll remember the fc_scope value in the nexthop
> and use that in the address selection call.

I'll push the following into net-next-2.6, thanks Julian:

--------------------
ipv4: Fix scope value used in route src-address caching.

We have to use cfg->fc_scope not the final nh_scope value.

Reported-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h     |    1 +
 net/ipv4/fib_semantics.c |    5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 0e14083..3f6c943 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -51,6 +51,7 @@ struct fib_nh {
 	struct fib_info		*nh_parent;
 	unsigned		nh_flags;
 	unsigned char		nh_scope;
+	unsigned char		nh_cfg_scope;
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	int			nh_weight;
 	int			nh_power;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 952c737..d73d758 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -854,9 +854,10 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 	}
 
 	change_nexthops(fi) {
+		nexthop_nh->nh_cfg_scope = cfg->fc_scope;
 		nexthop_nh->nh_saddr = inet_select_addr(nexthop_nh->nh_dev,
 							nexthop_nh->nh_gw,
-							nexthop_nh->nh_scope);
+							nexthop_nh->nh_cfg_scope);
 	} endfor_nexthops(fi)
 
 link_it:
@@ -1141,7 +1142,7 @@ void fib_update_nh_saddrs(struct net_device *dev)
 			continue;
 		nh->nh_saddr = inet_select_addr(nh->nh_dev,
 						nh->nh_gw,
-						nh->nh_scope);
+						nh->nh_cfg_scope);
 	}
 }
 
-- 
1.7.4.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] ipv4: Cache source address in nexthop entries.
  2011-03-08 18:38   ` David Miller
  2011-03-08 19:03     ` David Miller
@ 2011-03-09  0:49     ` Julian Anastasov
  2011-03-09 21:08       ` David Miller
  1 sibling, 1 reply; 10+ messages in thread
From: Julian Anastasov @ 2011-03-09  0:49 UTC (permalink / raw)
  To: David Miller; +Cc: netdev


 	Hello,

On Tue, 8 Mar 2011, David Miller wrote:

>> As the addresses have nothing to do with the link
>> state, I don't think it is correct to call fib_update_nh_saddrs
>> for DEV events.
>
> fib_update_nh_saddrs() gets called for "DEV" events since those
> are what are emitted when addresses are addded and removed.

 	But we still need to propagate the address events
to nexthops on all devices. Even if it is slow, I see it in
this way (not tested):

static inline void update_nh_saddr(const struct in_ifaddr *ifa,
 				   struct net_device *dev,
 				   struct fib_nh *nexthop_nh,
 				   unsigned long event)
{
 	if (event == NETDEV_UP) {
 		if (ifa->ifa_local == nexthop_nh->nh_saddr)
 			return;
 	} else {
 		if (ifa->ifa_local != nexthop_nh->nh_saddr)
 			return;
 	}
 	if (ifa->ifa_scope > nexthop_nh->nh_cfg_scope)
 		return;
 	if (dev != nexthop_nh->nh_dev && ifa->ifa_scope >= RT_SCOPE_LINK)
 		return;
 	if (nexthop_nh->nh_flags & RTNH_F_DEAD)
 		return;
 	nexthop_nh->nh_saddr = inet_select_addr(nexthop_nh->nh_dev,
 						nexthop_nh->nh_gw,
 						nexthop_nh->nh_cfg_scope);
}

void fib_update_nh_saddrs(const struct in_ifaddr *ifa, unsigned long event)
{
 	struct net_device *dev = ifa->ifa_dev->dev;
 	int i;

 	for (i = 0; i < fib_hash_size; i++) {
 		struct hlist_head *head = &fib_info_hash[i];
 		struct hlist_node *node;
 		struct fib_info *fi;

 		hlist_for_each_entry(fi, node, head, fib_hash) {
 			if (fi->fib_prefsrc)
 				continue;
 			change_nexthops(fi) {
 				if (!nexthop_nh->nh_dev)
 					continue;
 				update_nh_saddr(ifa, dev, nexthop_nh, event);
 			} endfor_nexthops(fi);
 		}
 	}
}

 	Then may be fib_sync_up() should also refresh nh_saddr,
so that all alive nexthops get actual source address which
was not possible while device was down.

Regards

--
Julian Anastasov <ja@ssi.bg>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] ipv4: Cache source address in nexthop entries.
  2011-03-09  0:49     ` Julian Anastasov
@ 2011-03-09 21:08       ` David Miller
  2011-03-09 23:32         ` Julian Anastasov
  0 siblings, 1 reply; 10+ messages in thread
From: David Miller @ 2011-03-09 21:08 UTC (permalink / raw)
  To: ja; +Cc: netdev

From: Julian Anastasov <ja@ssi.bg>
Date: Wed, 9 Mar 2011 02:49:49 +0200 (EET)

> 	But we still need to propagate the address events
> to nexthops on all devices. Even if it is slow, I see it in
> this way (not tested):

Ok, I think it is even more involved than what you propose.

We have to potentially re-run inet_select_addr() on all nexthops, even
those pointing to devices other than the one being modified, because
when an interface loses it's last IP address we have to look for a
source address to use on other active interfaces.

Probably it makes sense to recompute this at use time instead of
walking all of this stuff at every event.

So device address operations increment a generation ID and
FIB_RES_PREFSRC() checks that ID against one stored in the nexthop.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] ipv4: Cache source address in nexthop entries.
  2011-03-09 21:08       ` David Miller
@ 2011-03-09 23:32         ` Julian Anastasov
  2011-03-09 23:40           ` David Miller
  0 siblings, 1 reply; 10+ messages in thread
From: Julian Anastasov @ 2011-03-09 23:32 UTC (permalink / raw)
  To: David Miller; +Cc: netdev


 	Hello,

On Wed, 9 Mar 2011, David Miller wrote:

> From: Julian Anastasov <ja@ssi.bg>
> Date: Wed, 9 Mar 2011 02:49:49 +0200 (EET)
>
>> 	But we still need to propagate the address events
>> to nexthops on all devices. Even if it is slow, I see it in
>> this way (not tested):
>
> Ok, I think it is even more involved than what you propose.
>
> We have to potentially re-run inet_select_addr() on all nexthops, even
> those pointing to devices other than the one being modified, because
> when an interface loses it's last IP address we have to look for a
> source address to use on other active interfaces.

 	Yes. The common case is addresses to be scope
global. That means they can influence any nexthop on
any device. Only when address has scope >= LINK we can
use version that walks single table row for the same device.
In the default case we need to walk all table rows.

 	Additionally, it is possible same address to exist
on different devices, possibly with different scope. That is
why reacting only on "last_ip" indication when deleting IP
is not enough. May be while we delete scope-link address on the
concerned device, there can be same address as scope-link
on another device but its scope is not enough to keep
it as nh_saddr, so the right thing is to get different
IP as saddr. That is why in my version of fib_update_nh_saddrs
I don't care about "last_ip" indication. I think, my
version of fib_update_nh_saddrs is ok for IP add/del but
as I said, device UP events should recalc nh_saddr too.
In my version the logic is:

- add IP:
 	- check all NHs on all devices
 	- goal: if the added IP-scope is suitable for NH
 	it can replace other less-priority IPs, where the
 	priority depends on factors such as device, placement in
 	device list and scope
 	- if NH has same nh_saddr then nothing to do
 	- IP scope and DEV should be allowed for the nh_cfg_scope
 	to change the nh_saddr

- del IP:
 	- check all NHs on all devices
 	- goal: if the removed IP-scope was nh_saddr somewhere
 	then we should recalc NH, even when IP is not the
 	last address with same value
 	- if NH has different nh_saddr then nothing to do
 	- IP scope and DEV should be allowed for the nh_cfg_scope
 	to change the nh_saddr because may be this IP-scope-DEV
 	was used as nh_saddr

> Probably it makes sense to recompute this at use time instead of
> walking all of this stuff at every event.

 	Yes, it looks less dangerous.

> So device address operations increment a generation ID and
> FIB_RES_PREFSRC() checks that ID against one stored in the nexthop.

 	Yes, may be it is better this generation ID to be global
value for netns, because the global addresses should cause
refresh for all nexthops on all devices. The ID should be
matched with corresponding value in NH. Another option is
to use per-device gen_id instead of netns gen_id but in this
case scope-global address event should increment gen_id for all
devices while the device_UP event will increment only
gen_id for the concerned DEV.

 	I.e. there are 2 variants:

1. global netns gen_id:
 	- add/del IP, device UP => netns->gen_id ++

2. dev->gen_id:
 	- add/del IP with scope < LINK => increment
 	dev->gen_id for all devices
 	- add/del IP with scope >= LINK => increment
 	dev->gen_id only for concerned device
 	- device UP: increment dev->gen_id only for
 	concerned device

Regards

--
Julian Anastasov <ja@ssi.bg>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] ipv4: Cache source address in nexthop entries.
  2011-03-09 23:32         ` Julian Anastasov
@ 2011-03-09 23:40           ` David Miller
  0 siblings, 0 replies; 10+ messages in thread
From: David Miller @ 2011-03-09 23:40 UTC (permalink / raw)
  To: ja; +Cc: netdev

From: Julian Anastasov <ja@ssi.bg>
Date: Thu, 10 Mar 2011 01:32:18 +0200 (EET)

> On Wed, 9 Mar 2011, David Miller wrote:
> 
>> So device address operations increment a generation ID and
>> FIB_RES_PREFSRC() checks that ID against one stored in the nexthop.
> 
> 	Yes, may be it is better this generation ID to be global
> value for netns, because the global addresses should cause
> refresh for all nexthops on all devices. The ID should be
> matched with corresponding value in NH. Another option is
> to use per-device gen_id instead of netns gen_id but in this
> case scope-global address event should increment gen_id for all
> devices while the device_UP event will increment only
> gen_id for the concerned DEV.
> 
> 	I.e. there are 2 variants:

I think the per-namespace ID makes the most sense.

If the whole idea of the genid is to reduce complication, trying to place
games on a per-device basis wrt. the scope cases seems to work against that
goal :-)

After I try to attack the ipv6 routing regression I added in net-2.6, I'll
take a stab at this.

Thanks!

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] ipv4: Cache source address in nexthop entries.
  2011-03-08 19:03     ` David Miller
@ 2011-03-15 10:00       ` Julian Anastasov
  2011-03-15 23:02         ` David Miller
  0 siblings, 1 reply; 10+ messages in thread
From: Julian Anastasov @ 2011-03-15 10:00 UTC (permalink / raw)
  To: David Miller; +Cc: netdev


 	Hello,

On Tue, 8 Mar 2011, David Miller wrote:

> 	change_nexthops(fi) {
> +		nexthop_nh->nh_cfg_scope = cfg->fc_scope;
> 		nexthop_nh->nh_saddr = inet_select_addr(nexthop_nh->nh_dev,
> 							nexthop_nh->nh_gw,
> -							nexthop_nh->nh_scope);
> +							nexthop_nh->nh_cfg_scope);
> 	} endfor_nexthops(fi)

 	I see another problem with nh_saddr. fib_info
is reused when NHs and some parameters are equal but the
scope is not part of the fib_info and it is wrong to cache
route scope in NH or even in fib_info. The problem is
that we can expose in nh_saddr addresses with wrong
scope for the route. It happens when two routes with
different route scope reuse same fib_info.

 	First thought is that the field fa_scope can be
moved into fib_scope in struct fib_info, not to use
nh_cfg_scope. Then fib_find_info can differentiate the
entries by fib_scope. It means, fib_info will not be reused
if fib_scope is different. For FIB_RES_PREFSRC I assume we
can access fib_info from nh->nh_parent safely when
refreshing nh_saddr.

 	Here is a script that demonstrates the wrong
exposing:

#! /bin/sh -x

dev=eth1

ip link set $dev up
ip addr flush $dev
ip route flush dev $dev

# address that we want to show only to site
ip addr add 1.2.3.4/24 brd + dev $dev scope site

# first route learns 1.2.3.4 as nh_saddr, it is suitable for site
ip route add 5.6.7.0/24 via 1.2.3.6 dev $dev scope site

# our global primary address for universe, bigger subnet
ip addr add 1.2.3.5/23 brd + dev $dev scope global

# second route reuses same fib_info, exposes site address
# 1.2.3.4 to the 4.3.2.0 world
ip route add 4.3.2.0/24 via 1.2.3.6 dev $dev scope global

# show results:

# we must see src 1.2.3.4:
ip route get 5.6.7.8

# we must see src 1.2.3.5. Old kernels show 1.2.3.5, but we
# show 1.2.3.4:
ip route get 4.3.2.1

Regards

--
Julian Anastasov <ja@ssi.bg>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] ipv4: Cache source address in nexthop entries.
  2011-03-15 10:00       ` Julian Anastasov
@ 2011-03-15 23:02         ` David Miller
  0 siblings, 0 replies; 10+ messages in thread
From: David Miller @ 2011-03-15 23:02 UTC (permalink / raw)
  To: ja; +Cc: netdev

From: Julian Anastasov <ja@ssi.bg>
Date: Tue, 15 Mar 2011 12:00:33 +0200 (EET)

> 	I see another problem with nh_saddr. fib_info
> is reused when NHs and some parameters are equal but the
> scope is not part of the fib_info and it is wrong to cache
> route scope in NH or even in fib_info. The problem is
> that we can expose in nh_saddr addresses with wrong
> scope for the route. It happens when two routes with
> different route scope reuse same fib_info.
> 
> 	First thought is that the field fa_scope can be
> moved into fib_scope in struct fib_info, not to use
> nh_cfg_scope. Then fib_find_info can differentiate the
> entries by fib_scope. It means, fib_info will not be reused
> if fib_scope is different. For FIB_RES_PREFSRC I assume we
> can access fib_info from nh->nh_parent safely when
> refreshing nh_saddr.
> 
> 	Here is a script that demonstrates the wrong
> exposing:

Thanks for all of your feedback Julian, I will look into
this very soon.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2011-03-15 23:01 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-03-08  4:58 [PATCH] ipv4: Cache source address in nexthop entries David Miller
2011-03-08  9:57 ` Julian Anastasov
2011-03-08 18:38   ` David Miller
2011-03-08 19:03     ` David Miller
2011-03-15 10:00       ` Julian Anastasov
2011-03-15 23:02         ` David Miller
2011-03-09  0:49     ` Julian Anastasov
2011-03-09 21:08       ` David Miller
2011-03-09 23:32         ` Julian Anastasov
2011-03-09 23:40           ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).