netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] IPv6: Improvement of Source Address Selection
@ 2002-09-27 15:17 YOSHIFUJI Hideaki / 吉藤英明
  2002-09-27 16:02 ` kuznet
                   ` (2 more replies)
  0 siblings, 3 replies; 27+ messages in thread
From: YOSHIFUJI Hideaki / 吉藤英明 @ 2002-09-27 15:17 UTC (permalink / raw)
  To: linux-kernel, netdev; +Cc: usagi

Hello!

This patch supports standard default source address selection
algorithm.  It takes status, address/prefix itself (prefer same address,
prefer longest matching prefix) into consideration.
Note: Even though matching label is not implemented yet,
      this is better than current one.

Following patch is against linux-2.4.19.

Thank you in advance.

-------------------------------------------------------------------
Patch-Name: Improvement of Source Address Selection
Patch-Id: FIX_2_4_19_SADDRSELECT-20020906
Patch-Author: YOSHIFUJI Hideaki / USAGI Project <yoshfuji@linux-ipv6.org>
Credit: YOSHIFUJI Hideaki / USAGI Project <yoshfuji@linux-ipv6.org>
Reference: draft-ietf-ipv6-default-addr-select-09.txt
-------------------------------------------------------------------
Index: include/net/addrconf.h
===================================================================
RCS file: /cvsroot/usagi/usagi-backport/linux24/include/net/addrconf.h,v
retrieving revision 1.1.1.1
retrieving revision 1.1.1.1.6.1
diff -u -r1.1.1.1 -r1.1.1.1.6.1
--- include/net/addrconf.h	2002/08/20 09:46:45	1.1.1.1
+++ include/net/addrconf.h	2002/09/26 19:15:15	1.1.1.1.6.1
@@ -55,6 +55,9 @@
 					      struct net_device *dev);
 extern struct inet6_ifaddr *	ipv6_get_ifaddr(struct in6_addr *addr,
 						struct net_device *dev);
+extern int			ipv6_dev_get_saddr(struct net_device *ddev,
+						   struct in6_addr *daddr,
+						   struct in6_addr *saddr);
 extern int			ipv6_get_saddr(struct dst_entry *dst, 
 					       struct in6_addr *daddr,
 					       struct in6_addr *saddr);
Index: net/ipv6/addrconf.c
===================================================================
RCS file: /cvsroot/usagi/usagi-backport/linux24/net/ipv6/addrconf.c,v
retrieving revision 1.1.1.1
retrieving revision 1.1.1.1.6.4
diff -u -r1.1.1.1 -r1.1.1.1.6.4
--- net/ipv6/addrconf.c	2002/08/20 09:47:02	1.1.1.1
+++ net/ipv6/addrconf.c	2002/09/26 19:28:13	1.1.1.1.6.4
@@ -26,6 +26,10 @@
  *						packets.
  *	yoshfuji@USAGI			:       Fixed interval between DAD
  *						packets.
+ *	YOSHIFUJI Hideaki @USAGI	:	improved source address
+ *						selection; consider scope,
+ *						status etc.
+ *
  */
 
 #include <linux/config.h>
@@ -188,6 +192,99 @@
 	return IPV6_ADDR_RESERVED;
 }
 
+#ifndef IPV6_ADDR_MC_SCOPE
+#define IPV6_ADDR_MC_SCOPE(a)	\
+	((a)->s6_addr[1] & 0x0f)	/* XXX nonstandard */
+#define __IPV6_ADDR_SCOPE_RESERVED	-2
+#define __IPV6_ADDR_SCOPE_ANY		-1
+#define IPV6_ADDR_SCOPE_NODELOCAL	0x01
+#define IPV6_ADDR_SCOPE_LINKLOCAL	0x02
+#define IPV6_ADDR_SCOPE_SITELOCAL	0x05
+#define IPV6_ADDR_SCOPE_ORGLOCAL	0x08
+#define IPV6_ADDR_SCOPE_GLOBAL		0x0e
+#endif
+
+int ipv6_addrselect_scope(const struct in6_addr *addr)
+{
+	u32 st;
+
+	st = addr->s6_addr32[0];
+
+	if ((st & __constant_htonl(0xE0000000)) != __constant_htonl(0x00000000) &&
+	    (st & __constant_htonl(0xE0000000)) != __constant_htonl(0xE0000000))
+		return IPV6_ADDR_SCOPE_GLOBAL;
+
+	if ((st & __constant_htonl(0xFF000000)) == __constant_htonl(0xFF000000))
+		return IPV6_ADDR_MC_SCOPE(addr);
+        
+	if ((st & __constant_htonl(0xFFC00000)) == __constant_htonl(0xFE800000))
+		return IPV6_ADDR_SCOPE_LINKLOCAL;
+
+	if ((st & __constant_htonl(0xFFC00000)) == __constant_htonl(0xFEC00000))
+		return IPV6_ADDR_SCOPE_SITELOCAL;
+
+	if ((st | addr->s6_addr32[1]) == 0) {
+		if (addr->s6_addr32[2] == 0) {
+			if (addr->s6_addr32[3] == 0)
+				return __IPV6_ADDR_SCOPE_ANY;
+
+			if (addr->s6_addr32[3] == __constant_htonl(0x00000001))
+				return IPV6_ADDR_SCOPE_LINKLOCAL;	/* section 2.4 */
+
+			return IPV6_ADDR_SCOPE_GLOBAL;			/* section 2.3 */
+		}
+
+		if (addr->s6_addr32[2] == __constant_htonl(0x0000FFFF)) {
+			if (addr->s6_addr32[3] == __constant_htonl(0xA9FF0000))
+				return IPV6_ADDR_SCOPE_LINKLOCAL;	/* section 2.2 */
+			if (addr->s6_addr32[3] == __constant_htonl(0xAC000000)) {
+				if (addr->s6_addr32[3] == __constant_htonl(0xAC100000))
+					return IPV6_ADDR_SCOPE_SITELOCAL;	/* section 2.2 */
+
+				return IPV6_ADDR_SCOPE_LINKLOCAL;	/* section 2.2 */
+			}
+			if (addr->s6_addr32[3] == __constant_htonl(0x0A000000))
+				return IPV6_ADDR_SCOPE_SITELOCAL;	/* section 2.2 */
+			if (addr->s6_addr32[3] == __constant_htonl(0xC0A80000))
+				return IPV6_ADDR_SCOPE_SITELOCAL;	/* section 2.2 */
+
+                        return IPV6_ADDR_SCOPE_GLOBAL;                  /* section 2.2 */
+		}
+	}
+
+	return __IPV6_ADDR_SCOPE_RESERVED;
+}
+
+/* find 1st bit in difference between the 2 addrs */
+static inline int addr_diff(const void *__a1, const void *__a2, int addrlen)
+{
+	/* find 1st bit in difference between the 2 addrs.
+	 * bit may be an invalid value,
+	 * but if it is >= plen, the value is ignored in any case.
+	 */
+	const u32 *a1 = __a1;
+	const u32 *a2 = __a2;
+	int i;
+
+	addrlen >>= 2;
+	for (i = 0; i < addrlen; i++) {
+		u32 xb = a1[i] ^ a2[i];
+		if (xb) {
+			int j = 31;
+			xb = ntohl(xb);
+			while ((xb & (1 << j)) == 0)
+				j--;
+			return (i * 32 + 31 - j);
+		}
+	}
+	return addrlen<<5;
+}
+
+static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_addr *a2)
+{
+	 return addr_diff(a1->s6_addr, a2->s6_addr, sizeof(struct in6_addr));
+}
+
 static void addrconf_del_timer(struct inet6_ifaddr *ifp)
 {
 	if (del_timer(&ifp->timer))
@@ -449,120 +546,137 @@
 
 /*
  *	Choose an apropriate source address
- *	should do:
- *	i)	get an address with an apropriate scope
- *	ii)	see if there is a specific route for the destination and use
- *		an address of the attached interface 
- *	iii)	don't use deprecated addresses
+ *	draft-ietf-ipngwg-default-addr-select-09.txt
  */
-int ipv6_get_saddr(struct dst_entry *dst,
-		   struct in6_addr *daddr, struct in6_addr *saddr)
+struct addrselect_attrs {
+	struct inet6_ifaddr *ifp;
+	int	match;
+	int	deprecated;
+	int	home;
+	int	temporary;
+	int	device;
+	int	scope;
+	int	label;
+	int	matchlen;
+};
+
+int ipv6_dev_get_saddr(struct net_device *daddr_dev,
+		       struct in6_addr *daddr, struct in6_addr *saddr)
 {
-	int scope;
-	struct inet6_ifaddr *ifp = NULL;
-	struct inet6_ifaddr *match = NULL;
-	struct net_device *dev = NULL;
+	int daddr_scope;
+	struct inet6_ifaddr *ifp0, *ifp = NULL;
+	struct net_device *dev;
 	struct inet6_dev *idev;
-	struct rt6_info *rt;
-	int err;
 
-	rt = (struct rt6_info *) dst;
-	if (rt)
-		dev = rt->rt6i_dev;
-
-	scope = ipv6_addr_scope(daddr);
-	if (rt && (rt->rt6i_flags & RTF_ALLONLINK)) {
-		/*
-		 *	route for the "all destinations on link" rule
-		 *	when no routers are present
-		 */
-		scope = IFA_LINK;
-	}
-
-	/*
-	 *	known dev
-	 *	search dev and walk through dev addresses
-	 */
+	int err;
+	int update;
+	struct addrselect_attrs candidate = {NULL,0,0,0,0,0,0,0,0};
 
-	if (dev) {
-		if (dev->flags & IFF_LOOPBACK)
-			scope = IFA_HOST;
+	daddr_scope = ipv6_addrselect_scope(daddr);
 
-		read_lock(&addrconf_lock);
+	read_lock(&dev_base_lock);
+	read_lock(&addrconf_lock);
+	for (dev = dev_base; dev; dev=dev->next) {
 		idev = __in6_dev_get(dev);
-		if (idev) {
-			read_lock_bh(&idev->lock);
-			for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
-				if (ifp->scope == scope) {
-					if (!(ifp->flags & (IFA_F_DEPRECATED|IFA_F_TENTATIVE))) {
-						in6_ifa_hold(ifp);
-						read_unlock_bh(&idev->lock);
-						read_unlock(&addrconf_lock);
-						goto out;
-					}
-
-					if (!match && !(ifp->flags & IFA_F_TENTATIVE)) {
-						match = ifp;
-						in6_ifa_hold(ifp);
-					}
+
+		if (!idev)
+			continue;
+
+		read_lock_bh(&idev->lock);
+		ifp0 = idev->addr_list;
+		for (ifp=ifp0; ifp; ifp=ifp->if_next) {
+			struct addrselect_attrs temp = {NULL,0,0,0,0,0,0,0,0};
+			update = 0;
+
+			/* Rule 1: Prefer same address */
+			temp.match = ipv6_addr_cmp(&ifp->addr, daddr) == 0;
+			if (!update)
+				update = temp.match - candidate.match;
+			if (update < 0) {
+				continue;
+			}
+
+			/* Rule 2: Prefer appropriate scope */
+			temp.scope = ipv6_addrselect_scope(&ifp->addr);
+			if (!update) {
+				update = temp.scope - candidate.scope;
+				if (update > 0) {
+					update = candidate.scope < daddr_scope ? 1 : -1;
+				} else if (update < 0) {
+					update = temp.scope < daddr_scope ? -1 : 1;
 				}
 			}
-			read_unlock_bh(&idev->lock);
-		}
-		read_unlock(&addrconf_lock);
-	}
+			if (update < 0) {
+				continue;
+			}
 
-	if (scope == IFA_LINK)
-		goto out;
+			/* Rule 3: Avoid deprecated address */
+			temp.deprecated = ifp->flags & IFA_F_DEPRECATED;
+			if (!update)
+				update = candidate.deprecated - temp.deprecated;
+			if (update < 0) {
+				continue;
+			}
 
-	/*
-	 *	dev == NULL or search failed for specified dev
-	 */
+			/* XXX: Rule 4: Prefer home address */
 
-	read_lock(&dev_base_lock);
-	read_lock(&addrconf_lock);
-	for (dev = dev_base; dev; dev=dev->next) {
-		idev = __in6_dev_get(dev);
-		if (idev) {
-			read_lock_bh(&idev->lock);
-			for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
-				if (ifp->scope == scope) {
-					if (!(ifp->flags&(IFA_F_DEPRECATED|IFA_F_TENTATIVE))) {
-						in6_ifa_hold(ifp);
-						read_unlock_bh(&idev->lock);
-						goto out_unlock_base;
-					}
-
-					if (!match && !(ifp->flags&IFA_F_TENTATIVE)) {
-						match = ifp;
-						in6_ifa_hold(ifp);
-					}
-				}
+			/* Rule 5: Prefer outgoing interface */
+			temp.device = daddr_dev ? daddr_dev == (ifp->idev ? ifp->idev->dev : daddr_dev) : 0;
+			if (!update)
+				update = temp.device - candidate.device;
+			if (update < 0) {
+				continue;
+			}
+
+			/* XXX: Rule 6: Prefer matching label */
+			temp.label = 0;
+			if (!update)
+				update = temp.label - candidate.label;
+			if (update < 0) {
+				continue;
 			}
-			read_unlock_bh(&idev->lock);
+
+			/* XXX: Rule 7: Prefer public address */
+
+			/* Rule 8: Use longest matching prefix */
+			temp.matchlen = ipv6_addr_diff(&ifp->addr, daddr);
+			if (!update)
+				update = temp.matchlen - candidate.matchlen;
+			if (update < 0) {
+				continue;
+			}
+
+			/* Final Rule */
+			if (update <= 0)
+				continue;
+
+			/* update candidate */
+			temp.ifp = ifp;
+			in6_ifa_hold(ifp);
+			if (candidate.ifp)
+				in6_ifa_put(candidate.ifp);
+			candidate = temp;
 		}
+		read_unlock_bh(&idev->lock);
 	}
-
-out_unlock_base:
 	read_unlock(&addrconf_lock);
 	read_unlock(&dev_base_lock);
-
-out:
-	if (ifp == NULL) {
-		ifp = match;
-		match = NULL;
-	}
 
-	err = -EADDRNOTAVAIL;
-	if (ifp) {
-		ipv6_addr_copy(saddr, &ifp->addr);
+	if (candidate.ifp) {
+		ipv6_addr_copy(saddr, &candidate.ifp->addr);
+		in6_ifa_put(candidate.ifp);
 		err = 0;
-		in6_ifa_put(ifp);
+	} else {
+		err = -EADDRNOTAVAIL;
 	}
-	if (match)
-		in6_ifa_put(match);
-
 	return err;
+}
+
+int ipv6_get_saddr(struct dst_entry *dst,
+		   struct in6_addr *daddr, struct in6_addr *saddr)
+{
+	return ipv6_dev_get_saddr(dst ? ((struct rt6_info *)dst)->rt6i_dev : NULL,
+				  daddr, saddr);
 }
 
 int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr)

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] IPv6: Improvement of Source Address Selection
  2002-09-27 15:17 [PATCH] IPv6: Improvement of Source Address Selection YOSHIFUJI Hideaki / 吉藤英明
@ 2002-09-27 16:02 ` kuznet
  2002-09-27 16:28   ` Pekka Savola
  2002-09-28  1:28 ` David S. Miller
  2002-10-03 16:50 ` YOSHIFUJI Hideaki / 吉藤英明
  2 siblings, 1 reply; 27+ messages in thread
From: kuznet @ 2002-09-27 16:02 UTC (permalink / raw)
  To: YOSHIFUJI Hideaki / 吉藤英明; +Cc: netdev

Hello!

> This patch supports standard default source address selection
> algorithm.

To all that I remember we had long discussion about this ages ago.
I said I hate this. Such complicated selection without caching is _bug_.
I see nothing improved since that time, except for the function became
even more hairy. :-)

Alexey

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] IPv6: Improvement of Source Address Selection
  2002-09-27 16:02 ` kuznet
@ 2002-09-27 16:28   ` Pekka Savola
  2002-09-27 16:55     ` kuznet
  0 siblings, 1 reply; 27+ messages in thread
From: Pekka Savola @ 2002-09-27 16:28 UTC (permalink / raw)
  To: kuznet; +Cc: YOSHIFUJI Hideaki / 吉藤英明, netdev

On Fri, 27 Sep 2002 kuznet@ms2.inr.ac.ru wrote:
> > This patch supports standard default source address selection
> > algorithm.
> 
> To all that I remember we had long discussion about this ages ago.
> I said I hate this. Such complicated selection without caching is _bug_.
> I see nothing improved since that time, except for the function became
> even more hairy. :-)

But you agree that a new selection is important, I think?

I agree that the spec as written (like, each address against every other, 
iterate N times etc.) seems to be like total crap.. but at least the 
intent seems to be clear-ish.

If caching was implemented I guess it would be triggered by:
 - address changes
 - route changes
 - a maximum lifetime of xx seconds?

Caching, if it can be done simply and reasonably seems like a very good
idea to me.

Btw I think labels are quite an important component of selection rules, as
it (similar to longest matching prefix) keeps certain classes of addresses
(e.g. 6to4, mapped addresses, compatible etc.) within the label.  That's
important.

User-manageable policy table is of less importance I think.

-- 
Pekka Savola                 "Tell me of difficulties surmounted,
Netcore Oy                   not those you stumble over and fall"
Systems. Networks. Security.  -- Robert Jordan: A Crown of Swords

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] IPv6: Improvement of Source Address Selection
  2002-09-27 16:28   ` Pekka Savola
@ 2002-09-27 16:55     ` kuznet
  0 siblings, 0 replies; 27+ messages in thread
From: kuznet @ 2002-09-27 16:55 UTC (permalink / raw)
  To: Pekka Savola; +Cc: yoshfuji, netdev

Hello!

> But you agree that a new selection is important, I think?

It is the thing which would be silly to disagree, people want this. :-)

I do not want bogus implementation blocking attempts to select address
for O(1) time.

> If caching was implemented I guess it would be triggered by:

It is just cached in routes like IP makes this. There is nothing sophisticated
there, the logic is aligned to logic of routing cache.

Alexey

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] IPv6: Improvement of Source Address Selection
  2002-09-27 15:17 [PATCH] IPv6: Improvement of Source Address Selection YOSHIFUJI Hideaki / 吉藤英明
  2002-09-27 16:02 ` kuznet
@ 2002-09-28  1:28 ` David S. Miller
  2002-09-28  2:28   ` kuznet
  2002-10-03 16:50 ` YOSHIFUJI Hideaki / 吉藤英明
  2 siblings, 1 reply; 27+ messages in thread
From: David S. Miller @ 2002-09-28  1:28 UTC (permalink / raw)
  To: yoshfuji; +Cc: linux-kernel, netdev, usagi, kuznet

   From: YOSHIFUJI Hideaki / 吉藤英明 <yoshfuji@linux-ipv6.org>
   Date: Sat, 28 Sep 2002 00:17:42 +0900 (JST)

Please redesign this structure.

   +struct addrselect_attrs {
   +	struct inet6_ifaddr *ifp;
   +	int	match;
   +	int	deprecated;
   +	int	home;
   +	int	temporary;
   +	int	device;
   +	int	scope;
   +	int	label;
   +	int	matchlen;
   +};

This is much larger than it needs to be.  Please replace these "int"
binary states with single "u32 flags;" and appropriate bit
definitions.

This structure sits on the stack, so it is important to be
as small as we can easily make it.

Otherwise I have no problems with the patch, Alexey?

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] IPv6: Improvement of Source Address Selection
  2002-09-28  1:28 ` David S. Miller
@ 2002-09-28  2:28   ` kuznet
  2002-09-28  2:34     ` Andi Kleen
  2002-09-28  2:35     ` David S. Miller
  0 siblings, 2 replies; 27+ messages in thread
From: kuznet @ 2002-09-28  2:28 UTC (permalink / raw)
  To: David S. Miller; +Cc: yoshfuji, linux-kernel, netdev, usagi

Hello!

> Otherwise I have no problems with the patch, Alexey?

I have... The implementation is bad. Source address must be retieved
from route, not running this elephant function each packet.

Alexey

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] IPv6: Improvement of Source Address Selection
  2002-09-28  2:28   ` kuznet
@ 2002-09-28  2:34     ` Andi Kleen
  2002-09-28  2:35     ` David S. Miller
  1 sibling, 0 replies; 27+ messages in thread
From: Andi Kleen @ 2002-09-28  2:34 UTC (permalink / raw)
  To: kuznet; +Cc: David S. Miller, yoshfuji, linux-kernel, netdev, usagi

On Sat, Sep 28, 2002 at 06:28:29AM +0400, A.N.Kuznetsov wrote:
> Hello!
> 
> > Otherwise I have no problems with the patch, Alexey?
> 
> I have... The implementation is bad. Source address must be retieved
> from route, not running this elephant function each packet.

So it just needs to be moved into ip_route_output, right ? 

-Andi

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] IPv6: Improvement of Source Address Selection
  2002-09-28  2:28   ` kuznet
  2002-09-28  2:34     ` Andi Kleen
@ 2002-09-28  2:35     ` David S. Miller
  2002-09-28  2:58       ` kuznet
  1 sibling, 1 reply; 27+ messages in thread
From: David S. Miller @ 2002-09-28  2:35 UTC (permalink / raw)
  To: kuznet; +Cc: yoshfuji, linux-kernel, netdev, usagi

   From: kuznet@ms2.inr.ac.ru
   Date: Sat, 28 Sep 2002 06:28:29 +0400 (MSD)

   > Otherwise I have no problems with the patch, Alexey?
   
   I have... The implementation is bad. Source address must be retieved
   from route, not running this elephant function each packet.
   
This only runs at connect time, and when NULL fl->fl6_src is seen
by ip6_build_xmit() (this means RAW,UDP,ICMP which must make these
decisions anyways).

Is there really so much computation to be saved by moving this
to ipv6 route?

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] IPv6: Improvement of Source Address Selection
  2002-09-28  2:58       ` kuznet
@ 2002-09-28  2:55         ` David S. Miller
  2002-09-28  3:38           ` kuznet
  0 siblings, 1 reply; 27+ messages in thread
From: David S. Miller @ 2002-09-28  2:55 UTC (permalink / raw)
  To: kuznet; +Cc: yoshfuji, linux-kernel, netdev, usagi

   From: kuznet@ms2.inr.ac.ru
   Date: Sat, 28 Sep 2002 06:58:22 +0400 (MSD)

   > This only runs at connect time
   
   ... and also at ip6_build_xmit(). Connected dgram sockets are marginal.

I said UDP/RAW.  At least believe that I am this smart :-)

Point is that current function is not tiny either, so improvement you
suggest applies both to current code and code after Yoshi's change.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] IPv6: Improvement of Source Address Selection
  2002-09-28  2:35     ` David S. Miller
@ 2002-09-28  2:58       ` kuznet
  2002-09-28  2:55         ` David S. Miller
  0 siblings, 1 reply; 27+ messages in thread
From: kuznet @ 2002-09-28  2:58 UTC (permalink / raw)
  To: David S. Miller; +Cc: yoshfuji, linux-kernel, netdev, usagi

Hello!

> This only runs at connect time

... and also at ip6_build_xmit(). Connected dgram sockets are marginal.

Alexey

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] IPv6: Improvement of Source Address Selection
  2002-09-28  3:38           ` kuznet
@ 2002-09-28  3:36             ` David S. Miller
  2002-09-28  4:19               ` kuznet
  0 siblings, 1 reply; 27+ messages in thread
From: David S. Miller @ 2002-09-28  3:36 UTC (permalink / raw)
  To: kuznet; +Cc: yoshfuji, linux-kernel, netdev, usagi

   From: kuznet@ms2.inr.ac.ru
   Date: Sat, 28 Sep 2002 07:38:09 +0400 (MSD)

   Now I see retransmission of practicllay the same patch, which was deferred
   for improvement that time.

Ok, Yoshi please work Alexey to put source address selection into the
right place and remove ipv6_get_saddr().

Alexey, I still am not clear, this belongs in the output routing logic
right?  You dance in circles talking about this patch, that patch,
but what I cannot decode this into an answer to question of where
source address selection belongs.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] IPv6: Improvement of Source Address Selection
  2002-09-28  2:55         ` David S. Miller
@ 2002-09-28  3:38           ` kuznet
  2002-09-28  3:36             ` David S. Miller
  0 siblings, 1 reply; 27+ messages in thread
From: kuznet @ 2002-09-28  3:38 UTC (permalink / raw)
  To: David S. Miller; +Cc: yoshfuji, linux-kernel, netdev, usagi

Hello!

> suggest applies both to current code and code after Yoshi's change.

This is wrong, unfortunately. The elimination of ipv6_get_saddr()
was trivial before this patch (because of independance of preferred source
on real destination, only on scope), the corresponding fix was withdrawn
from 2.4 only for sake of this feature, pending as a well-known patch.
Now I see retransmission of practicllay the same patch, which was deferred
for improvement that time.

Citing myself two years younger:

> The first priority task is to eliminate address selection function.
> 
> Without this odd feature it was easy and, in fact, address selection
> patches forced me to withdraw the solution from kernel, because
> it makes these hacks much more difficult.

Alexey

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] IPv6: Improvement of Source Address Selection
  2002-09-28  3:36             ` David S. Miller
@ 2002-09-28  4:19               ` kuznet
  2002-09-28  4:30                 ` YOSHIFUJI Hideaki / 吉藤英明
  2002-09-28  4:35                 ` Pekka Savola
  0 siblings, 2 replies; 27+ messages in thread
From: kuznet @ 2002-09-28  4:19 UTC (permalink / raw)
  To: David S. Miller; +Cc: yoshfuji, linux-kernel, netdev, usagi

Hello!

> Alexey, I still am not clear, this belongs in the output routing logic
> right?
...
> where source address selection belongs.

Yes, it naturally belongs to the time when route is created.

This is just extending ipv6 routing entry with a field to hold
source address and, generally, making the same work as IPv4 does,
with all the advantages, particularily capability to select preferred
source address via routes set up by admin (RTA_PREFSRC attribute,
"src" in "ip route add").

Alexey

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] IPv6: Improvement of Source Address Selection
  2002-09-28  4:19               ` kuznet
@ 2002-09-28  4:30                 ` YOSHIFUJI Hideaki / 吉藤英明
  2002-09-28  4:44                   ` kuznet
  2002-09-28  4:35                 ` Pekka Savola
  1 sibling, 1 reply; 27+ messages in thread
From: YOSHIFUJI Hideaki / 吉藤英明 @ 2002-09-28  4:30 UTC (permalink / raw)
  To: kuznet; +Cc: davem, linux-kernel, netdev, usagi

In article <200209280419.IAA02894@sex.inr.ac.ru> (at Sat, 28 Sep 2002 08:19:29 +0400 (MSD)), kuznet@ms2.inr.ac.ru says:

> This is just extending ipv6 routing entry with a field to hold
> source address and, generally, making the same work as IPv4 does,
> with all the advantages, particularily capability to select preferred
> source address via routes set up by admin (RTA_PREFSRC attribute,
> "src" in "ip route add").

we need per socket preference.
can we do that with this?

-- 
Hideaki YOSHIFUJI @ USAGI Project <yoshfuji@linux-ipv6.org>
GPG FP: 9022 65EB 1ECF 3AD1 0BDF  80D8 4807 F894 E062 0EEA

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] IPv6: Improvement of Source Address Selection
  2002-09-28  4:19               ` kuznet
  2002-09-28  4:30                 ` YOSHIFUJI Hideaki / 吉藤英明
@ 2002-09-28  4:35                 ` Pekka Savola
  2002-09-28  5:00                   ` kuznet
  1 sibling, 1 reply; 27+ messages in thread
From: Pekka Savola @ 2002-09-28  4:35 UTC (permalink / raw)
  To: kuznet; +Cc: David S. Miller, yoshfuji, linux-kernel, netdev, usagi

On Sat, 28 Sep 2002 kuznet@ms2.inr.ac.ru wrote:
> Hello!
> 
> > Alexey, I still am not clear, this belongs in the output routing logic
> > right?
> ...
> > where source address selection belongs.
> 
> Yes, it naturally belongs to the time when route is created.
> 
> This is just extending ipv6 routing entry with a field to hold
> source address and, generally, making the same work as IPv4 does,
> with all the advantages, particularily capability to select preferred
> source address via routes set up by admin (RTA_PREFSRC attribute,
> "src" in "ip route add").

Umm.. you sure?

Isn't putting this logic to routes an oversimplification?

Consider e.g. a dummy host which only have a few address (link-local,
site-local, global; the last two /64's) and, basically, a default route
(plus of course an interface routes for those /64's).

When talking to other subnets within the site (ie. those not on the /64)  
one would have difficulties parsing the source address from the default
route, as there would have to be at least two candidates there.

Am I missing something obvious here?

Alexey's approach should work in some simpler cases, but maybe not all
(stuff that's network prefix -independent like home addresses, privacy
addresses etc. would be different).

-- 
Pekka Savola                 "Tell me of difficulties surmounted,
Netcore Oy                   not those you stumble over and fall"
Systems. Networks. Security.  -- Robert Jordan: A Crown of Swords

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] IPv6: Improvement of Source Address Selection
  2002-09-28  4:30                 ` YOSHIFUJI Hideaki / 吉藤英明
@ 2002-09-28  4:44                   ` kuznet
  2002-09-28  5:14                     ` YOSHIFUJI Hideaki / 吉藤英明
  0 siblings, 1 reply; 27+ messages in thread
From: kuznet @ 2002-09-28  4:44 UTC (permalink / raw)
  To: YOSHIFUJI Hideaki / 吉藤英明
  Cc: davem, linux-kernel, netdev, usagi

Hello!

> we need per socket preference.

What kind of? Some matching rules loaded to socket by user?

Anyway, rules established by a particular client should be separate,
it is just a generalization of bind()/IP{V6}_PKTINFO.

I am not sure that it is really interesting though. Just now I cannot
imagine what user can invent which is not covered by system-wide rules,
bind() and IP{V6}_PKTINFO. Well, if you think more hairy scheme is interesting,
feel free to implement this.

Alexey

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] IPv6: Improvement of Source Address Selection
  2002-09-28  4:35                 ` Pekka Savola
@ 2002-09-28  5:00                   ` kuznet
  2002-09-28  5:24                     ` Pekka Savola
  0 siblings, 1 reply; 27+ messages in thread
From: kuznet @ 2002-09-28  5:00 UTC (permalink / raw)
  To: Pekka Savola; +Cc: davem, yoshfuji, linux-kernel, netdev, usagi

Hello!

> Isn't putting this logic to routes an oversimplification?

Hmmm... I believed this logic is more complicated yet. :-)


> route, as there would have to be at least two candidates there.
...
> Am I missing something obvious here?

Yes. You select some one of the candidates eventually, do not you? :-)
And when you have some special preference for a subnet you create
a route for it.

> (stuff that's network prefix -independent

I am sorry, I feel I do not understand what you mean.

Alexey

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] IPv6: Improvement of Source Address Selection
  2002-09-28  4:44                   ` kuznet
@ 2002-09-28  5:14                     ` YOSHIFUJI Hideaki / 吉藤英明
  2002-09-28  5:26                       ` kuznet
  0 siblings, 1 reply; 27+ messages in thread
From: YOSHIFUJI Hideaki / 吉藤英明 @ 2002-09-28  5:14 UTC (permalink / raw)
  To: usagi, kuznet; +Cc: davem, linux-kernel, netdev

In article <200209280444.IAA02959@sex.inr.ac.ru> (at Sat, 28 Sep 2002 08:44:29 +0400 (MSD)), kuznet@ms2.inr.ac.ru says:

> I am not sure that it is really interesting though. Just now I cannot
> imagine what user can invent which is not covered by system-wide rules,
> bind() and IP{V6}_PKTINFO. Well, if you think more hairy scheme is interesting,
> feel free to implement this.

we need per application (per socket) interface
for privacy extension (public address vs temporary address) and 
mobile ip (home address vs care-of address).

--
yoshfuji

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] IPv6: Improvement of Source Address Selection
  2002-09-28  5:00                   ` kuznet
@ 2002-09-28  5:24                     ` Pekka Savola
  2002-09-28  5:37                       ` kuznet
  0 siblings, 1 reply; 27+ messages in thread
From: Pekka Savola @ 2002-09-28  5:24 UTC (permalink / raw)
  To: kuznet; +Cc: davem, yoshfuji, linux-kernel, netdev, usagi

On Sat, 28 Sep 2002 kuznet@ms2.inr.ac.ru wrote:
> > route, as there would have to be at least two candidates there.
> ...
> > Am I missing something obvious here?
> 
> Yes. You select some one of the candidates eventually, do not you? :-)

But can there be more candidates for one route, in which case one would 
run something similar to this algorithm then?

Or would you have an already-sorted list of possible candidate addresses 
for each route in the order of preference?  And recalculate always when 
address changes?

Or..?

> And when you have some special preference for a subnet you create
> a route for it.

This is IMO a wrong approach from user's perspective.  Perhaps not if the 
algorithm was run and e.g. additional, temporary "address selection" 
routes were created by kernel.
 
> > (stuff that's network prefix -independent
> 
> I am sorry, I feel I do not understand what you mean.

Hmm.. this depends on the interpretation of the concept above.  If the
list is refreshed always when addresses change or change state, this could
perhaps work..

-- 
Pekka Savola                 "Tell me of difficulties surmounted,
Netcore Oy                   not those you stumble over and fall"
Systems. Networks. Security.  -- Robert Jordan: A Crown of Swords

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] IPv6: Improvement of Source Address Selection
  2002-09-28  5:14                     ` YOSHIFUJI Hideaki / 吉藤英明
@ 2002-09-28  5:26                       ` kuznet
  0 siblings, 0 replies; 27+ messages in thread
From: kuznet @ 2002-09-28  5:26 UTC (permalink / raw)
  To: YOSHIFUJI Hideaki / 吉藤英明
  Cc: usagi, davem, linux-kernel, netdev

Hello!

> we need per application (per socket) interface
> for privacy extension (public address vs temporary address) and 
> mobile ip (home address vs care-of address).

OK. It is natural user-friendly generalization of bind(). I do not see
problems.

Though, please, explain, to avoid misunderstanding. Let's take the second
case for simplicity. Is that true that it is supposed to add
to each application a switch "home or care-of"? This sound strange enough,
taking into account that only a few of applications have switch sort of -b
in openssh despite of age of plain bind() is equal to age of internet. :-)

Alexey

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] IPv6: Improvement of Source Address Selection
  2002-09-28  5:24                     ` Pekka Savola
@ 2002-09-28  5:37                       ` kuznet
  2002-09-29  8:41                         ` Pekka Savola
  0 siblings, 1 reply; 27+ messages in thread
From: kuznet @ 2002-09-28  5:37 UTC (permalink / raw)
  To: Pekka Savola; +Cc: davem, yoshfuji, linux-kernel, netdev, usagi

Hello!

> Or would you have an already-sorted list of possible candidate addresses 
> for each route in the order of preference?

I am not mad yet. :-)

What preference? You must select _one_ address, you do not need lost
candidates.


> And recalculate always when address changes?

What address? Interface address? Routing tables used to be synchronized
to this.


> This is IMO a wrong approach from user's perspective.  Perhaps not if the 
> algorithm was run and e.g. additional, temporary "address selection" 
> routes were created by kernel.
>  
> > > (stuff that's network prefix -independent
> > 
> > I am sorry, I feel I do not understand what you mean.
> 
> Hmm.. this depends on the interpretation of the concept above.  If the
> list is refreshed always when addresses change or change state, this could
> perhaps work..

I am afraid I do not understand what "address", "state", "temporary" routes
etc you mean. It remained in your brains. :-)

Pekka, are you not going to sleep? (I am.) I bet when you reread this tomorrow,
you will not blame that my brains eventually falled to "parse error" loop. :-)

Alexey

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] IPv6: Improvement of Source Address Selection
  2002-09-28  5:37                       ` kuznet
@ 2002-09-29  8:41                         ` Pekka Savola
  0 siblings, 0 replies; 27+ messages in thread
From: Pekka Savola @ 2002-09-29  8:41 UTC (permalink / raw)
  To: kuznet; +Cc: davem, yoshfuji, linux-kernel, netdev, usagi

On Sat, 28 Sep 2002 kuznet@ms2.inr.ac.ru wrote:
> > Or would you have an already-sorted list of possible candidate addresses 
> > for each route in the order of preference?
> 
> I am not mad yet. :-)
> 
> What preference? You must select _one_ address, you do not need lost
> candidates.

In the case the first entry goes away, having a list could help being able 
to the next one to use very easily.  But this probably just an 
implementation detail.

> > And recalculate always when address changes?
> 
> What address? Interface address? Routing tables used to be synchronized
> to this.

Any address.

One notable case is that the outgoing interface has only link/site-local 
addresses and the destination is global.  There are other cases too.

> > This is IMO a wrong approach from user's perspective.  Perhaps not if the 
> > algorithm was run and e.g. additional, temporary "address selection" 
> > routes were created by kernel.
> >  
> > > > (stuff that's network prefix -independent
> > > 
> > > I am sorry, I feel I do not understand what you mean.
> > 
> > Hmm.. this depends on the interpretation of the concept above.  If the
> > list is refreshed always when addresses change or change state, this could
> > perhaps work..
> 
> I am afraid I do not understand what "address", "state", "temporary" routes
> etc you mean. It remained in your brains. :-)
> 
> Pekka, are you not going to sleep? (I am.) I bet when you reread this tomorrow,
> you will not blame that my brains eventually falled to "parse error" loop. :-)

I had already woken up :-).

At least BSD and I think Linux create ad-hoc, "cloned" routes e.g. in Path
MTU discovery process to hold some different values.  I don't remember the 
details.  I was wondering if this would be done the same or not.

change state = move to deprecated, move to non-deprecated.

Hope this clarifies.

-- 
Pekka Savola                 "Tell me of difficulties surmounted,
Netcore Oy                   not those you stumble over and fall"
Systems. Networks. Security.  -- Robert Jordan: A Crown of Swords

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] IPv6: Improvement of Source Address Selection
  2002-09-27 15:17 [PATCH] IPv6: Improvement of Source Address Selection YOSHIFUJI Hideaki / 吉藤英明
  2002-09-27 16:02 ` kuznet
  2002-09-28  1:28 ` David S. Miller
@ 2002-10-03 16:50 ` YOSHIFUJI Hideaki / 吉藤英明
  2002-10-04  6:32   ` Pekka Savola
  2 siblings, 1 reply; 27+ messages in thread
From: YOSHIFUJI Hideaki / 吉藤英明 @ 2002-10-03 16:50 UTC (permalink / raw)
  To: netdev; +Cc: usagi

In article <20020928.001742.125874265.yoshfuji@linux-ipv6.org> (at Sat, 28 Sep 2002 00:17:42 +0900 (JST)), YOSHIFUJI Hideaki / 吉藤英明 <yoshfuji@linux-ipv6.org> says:

> This patch supports standard default source address selection
> algorithm.  It takes status, address/prefix itself (prefer same address,
> prefer longest matching prefix) into consideration.
> Note: Even though matching label is not implemented yet,
>       this is better than current one.
> 
> Following patch is against linux-2.4.19.

This patch is revised version.
I think we have more things to be done, but anyways,

 - save memory (comment from devem)
 - introduced (static) policy label (comment from pekkas)

Thanks in advance.

------
Index: include/net/addrconf.h
===================================================================
RCS file: /cvsroot/usagi/usagi-backport/linux24/include/net/addrconf.h,v
retrieving revision 1.1.1.1
retrieving revision 1.1.1.1.6.1
diff -u -r1.1.1.1 -r1.1.1.1.6.1
--- include/net/addrconf.h	2002/08/20 09:46:45	1.1.1.1
+++ include/net/addrconf.h	2002/09/26 19:15:15	1.1.1.1.6.1
@@ -55,6 +55,9 @@
 					      struct net_device *dev);
 extern struct inet6_ifaddr *	ipv6_get_ifaddr(struct in6_addr *addr,
 						struct net_device *dev);
+extern int			ipv6_dev_get_saddr(struct net_device *ddev,
+						   struct in6_addr *daddr,
+						   struct in6_addr *saddr);
 extern int			ipv6_get_saddr(struct dst_entry *dst, 
 					       struct in6_addr *daddr,
 					       struct in6_addr *saddr);
Index: net/ipv6/addrconf.c
===================================================================
RCS file: /cvsroot/usagi/usagi-backport/linux24/net/ipv6/addrconf.c,v
retrieving revision 1.1.1.1
retrieving revision 1.1.1.1.6.6
diff -u -r1.1.1.1 -r1.1.1.1.6.6
--- net/ipv6/addrconf.c	2002/08/20 09:47:02	1.1.1.1
+++ net/ipv6/addrconf.c	2002/10/03 03:28:33	1.1.1.1.6.6
@@ -26,6 +26,10 @@
  *						packets.
  *	yoshfuji@USAGI			:       Fixed interval between DAD
  *						packets.
+ *	YOSHIFUJI Hideaki @USAGI	:	improved source address
+ *						selection; consider scope,
+ *						status etc.
+ *
  */
 
 #include <linux/config.h>
@@ -104,6 +108,8 @@
 
 static struct notifier_block *inet6addr_chain;
 
+static u32 ipv6_addrselect_label_lookup(const struct in6_addr *addr, int ifindex);
+
 struct ipv6_devconf ipv6_devconf =
 {
 	0,				/* forwarding		*/
@@ -188,6 +194,99 @@
 	return IPV6_ADDR_RESERVED;
 }
 
+#ifndef IPV6_ADDR_MC_SCOPE
+#define IPV6_ADDR_MC_SCOPE(a)	\
+	((a)->s6_addr[1] & 0x0f)	/* XXX nonstandard */
+#define __IPV6_ADDR_SCOPE_RESERVED	-2
+#define __IPV6_ADDR_SCOPE_ANY		-1
+#define IPV6_ADDR_SCOPE_NODELOCAL	0x01
+#define IPV6_ADDR_SCOPE_LINKLOCAL	0x02
+#define IPV6_ADDR_SCOPE_SITELOCAL	0x05
+#define IPV6_ADDR_SCOPE_ORGLOCAL	0x08
+#define IPV6_ADDR_SCOPE_GLOBAL		0x0e
+#endif
+
+int ipv6_addrselect_scope(const struct in6_addr *addr)
+{
+	u32 st;
+
+	st = addr->s6_addr32[0];
+
+	if ((st & __constant_htonl(0xE0000000)) != __constant_htonl(0x00000000) &&
+	    (st & __constant_htonl(0xE0000000)) != __constant_htonl(0xE0000000))
+		return IPV6_ADDR_SCOPE_GLOBAL;
+
+	if ((st & __constant_htonl(0xFF000000)) == __constant_htonl(0xFF000000))
+		return IPV6_ADDR_MC_SCOPE(addr);
+        
+	if ((st & __constant_htonl(0xFFC00000)) == __constant_htonl(0xFE800000))
+		return IPV6_ADDR_SCOPE_LINKLOCAL;
+
+	if ((st & __constant_htonl(0xFFC00000)) == __constant_htonl(0xFEC00000))
+		return IPV6_ADDR_SCOPE_SITELOCAL;
+
+	if ((st | addr->s6_addr32[1]) == 0) {
+		if (addr->s6_addr32[2] == 0) {
+			if (addr->s6_addr32[3] == 0)
+				return __IPV6_ADDR_SCOPE_ANY;
+
+			if (addr->s6_addr32[3] == __constant_htonl(0x00000001))
+				return IPV6_ADDR_SCOPE_LINKLOCAL;	/* section 2.4 */
+
+			return IPV6_ADDR_SCOPE_GLOBAL;			/* section 2.3 */
+		}
+
+		if (addr->s6_addr32[2] == __constant_htonl(0x0000FFFF)) {
+			if (addr->s6_addr32[3] == __constant_htonl(0xA9FF0000))
+				return IPV6_ADDR_SCOPE_LINKLOCAL;	/* section 2.2 */
+			if (addr->s6_addr32[3] == __constant_htonl(0xAC000000)) {
+				if (addr->s6_addr32[3] == __constant_htonl(0xAC100000))
+					return IPV6_ADDR_SCOPE_SITELOCAL;	/* section 2.2 */
+
+				return IPV6_ADDR_SCOPE_LINKLOCAL;	/* section 2.2 */
+			}
+			if (addr->s6_addr32[3] == __constant_htonl(0x0A000000))
+				return IPV6_ADDR_SCOPE_SITELOCAL;	/* section 2.2 */
+			if (addr->s6_addr32[3] == __constant_htonl(0xC0A80000))
+				return IPV6_ADDR_SCOPE_SITELOCAL;	/* section 2.2 */
+
+                        return IPV6_ADDR_SCOPE_GLOBAL;                  /* section 2.2 */
+		}
+	}
+
+	return __IPV6_ADDR_SCOPE_RESERVED;
+}
+
+/* find 1st bit in difference between the 2 addrs */
+static inline int addr_diff(const void *__a1, const void *__a2, int addrlen)
+{
+	/* find 1st bit in difference between the 2 addrs.
+	 * bit may be an invalid value,
+	 * but if it is >= plen, the value is ignored in any case.
+	 */
+	const u32 *a1 = __a1;
+	const u32 *a2 = __a2;
+	int i;
+
+	addrlen >>= 2;
+	for (i = 0; i < addrlen; i++) {
+		u32 xb = a1[i] ^ a2[i];
+		if (xb) {
+			int j = 31;
+			xb = ntohl(xb);
+			while ((xb & (1 << j)) == 0)
+				j--;
+			return (i * 32 + 31 - j);
+		}
+	}
+	return addrlen<<5;
+}
+
+static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_addr *a2)
+{
+	 return addr_diff(a1->s6_addr, a2->s6_addr, sizeof(struct in6_addr));
+}
+
 static void addrconf_del_timer(struct inet6_ifaddr *ifp)
 {
 	if (del_timer(&ifp->timer))
@@ -449,122 +548,160 @@
 
 /*
  *	Choose an apropriate source address
- *	should do:
- *	i)	get an address with an apropriate scope
- *	ii)	see if there is a specific route for the destination and use
- *		an address of the attached interface 
- *	iii)	don't use deprecated addresses
+ *	draft-ietf-ipngwg-default-addr-select-09.txt
  */
-int ipv6_get_saddr(struct dst_entry *dst,
-		   struct in6_addr *daddr, struct in6_addr *saddr)
+#define IPV6_SADDRSELECT_SELF		0x01
+#define IPV6_SADDRSELECT_PREFERRED	0x02
+#define IPV6_SADDRSELECT_HOME		0x04
+#define IPV6_SADDRSELECT_PUBLIC		0x08
+#define IPV6_SADDRSELECT_INTERFACE	0x10
+#define IPV6_SADDRSELECT_LABEL		0x20
+
+struct addrselect_attrs {
+	struct inet6_ifaddr *ifp;
+	u16	flags;
+	s16	matchlen;
+	u8	scope;
+};
+
+int ipv6_dev_get_saddr(struct net_device *daddr_dev,
+		       struct in6_addr *daddr, struct in6_addr *saddr)
 {
-	int scope;
-	struct inet6_ifaddr *ifp = NULL;
-	struct inet6_ifaddr *match = NULL;
-	struct net_device *dev = NULL;
+	int daddr_scope;
+	u32 daddr_label;
+	struct inet6_ifaddr *ifp0, *ifp = NULL;
+	struct net_device *dev;
 	struct inet6_dev *idev;
-	struct rt6_info *rt;
-	int err;
 
-	rt = (struct rt6_info *) dst;
-	if (rt)
-		dev = rt->rt6i_dev;
-
-	scope = ipv6_addr_scope(daddr);
-	if (rt && (rt->rt6i_flags & RTF_ALLONLINK)) {
-		/*
-		 *	route for the "all destinations on link" rule
-		 *	when no routers are present
-		 */
-		scope = IFA_LINK;
-	}
-
-	/*
-	 *	known dev
-	 *	search dev and walk through dev addresses
-	 */
+	int err;
+	int update;
+	struct addrselect_attrs candidate = {NULL,0,0};
 
-	if (dev) {
-		if (dev->flags & IFF_LOOPBACK)
-			scope = IFA_HOST;
+	daddr_scope = ipv6_addrselect_scope(daddr);
+	daddr_label = ipv6_addrselect_label_lookup(daddr, 
+						   daddr_dev?daddr_dev->ifindex:0);
 
-		read_lock(&addrconf_lock);
+	read_lock(&dev_base_lock);
+	read_lock(&addrconf_lock);
+	for (dev = dev_base; dev; dev=dev->next) {
 		idev = __in6_dev_get(dev);
-		if (idev) {
-			read_lock_bh(&idev->lock);
-			for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
-				if (ifp->scope == scope) {
-					if (!(ifp->flags & (IFA_F_DEPRECATED|IFA_F_TENTATIVE))) {
-						in6_ifa_hold(ifp);
-						read_unlock_bh(&idev->lock);
-						read_unlock(&addrconf_lock);
-						goto out;
-					}
-
-					if (!match && !(ifp->flags & IFA_F_TENTATIVE)) {
-						match = ifp;
-						in6_ifa_hold(ifp);
-					}
+
+		if (!idev)
+			continue;
+
+		read_lock_bh(&idev->lock);
+		ifp0 = idev->addr_list;
+		for (ifp=ifp0; ifp; ifp=ifp->if_next) {
+			struct addrselect_attrs temp = {NULL,0,0};
+			update = 0;
+
+			/* Rule 1: Prefer same address */
+			if (ipv6_addr_cmp(&ifp->addr, daddr) == 0)
+				temp.flags |= IPV6_SADDRSELECT_SELF;
+			else
+				temp.flags &= ~IPV6_SADDRSELECT_SELF;
+				update = (temp.flags&IPV6_SADDRSELECT_SELF) -
+					 (candidate.flags&IPV6_SADDRSELECT_SELF);
+			if (update < 0) {
+				continue;
+			}
+
+			/* Rule 2: Prefer appropriate scope */
+			temp.scope = ipv6_addrselect_scope(&ifp->addr);
+			if (!update) {
+				update = temp.scope - candidate.scope;
+				if (update > 0) {
+					update = candidate.scope < daddr_scope ? 1 : -1;
+				} else if (update < 0) {
+					update = temp.scope < daddr_scope ? -1 : 1;
 				}
 			}
-			read_unlock_bh(&idev->lock);
-		}
-		read_unlock(&addrconf_lock);
-	}
+			if (update < 0) {
+				continue;
+			}
 
-	if (scope == IFA_LINK)
-		goto out;
+			/* Rule 3: Avoid deprecated address */
+			if (!(ifp->flags & IFA_F_DEPRECATED))
+				temp.flags |= IPV6_SADDRSELECT_PREFERRED;
+			else
+				temp.flags &= ~IPV6_SADDRSELECT_PREFERRED;
+			if (!update)
+				update = (temp.flags&IPV6_SADDRSELECT_PREFERRED) -
+					 (candidate.flags&IPV6_SADDRSELECT_PREFERRED);
+			if (update < 0) {
+				continue;
+			}
 
-	/*
-	 *	dev == NULL or search failed for specified dev
-	 */
+			/* XXX: Rule 4: Prefer home address */
 
-	read_lock(&dev_base_lock);
-	read_lock(&addrconf_lock);
-	for (dev = dev_base; dev; dev=dev->next) {
-		idev = __in6_dev_get(dev);
-		if (idev) {
-			read_lock_bh(&idev->lock);
-			for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
-				if (ifp->scope == scope) {
-					if (!(ifp->flags&(IFA_F_DEPRECATED|IFA_F_TENTATIVE))) {
-						in6_ifa_hold(ifp);
-						read_unlock_bh(&idev->lock);
-						goto out_unlock_base;
-					}
-
-					if (!match && !(ifp->flags&IFA_F_TENTATIVE)) {
-						match = ifp;
-						in6_ifa_hold(ifp);
-					}
-				}
+			/* Rule 5: Prefer outgoing interface */
+			if (daddr_dev == NULL || ifp->idev == NULL ||
+			    daddr_dev == ifp->idev->dev)
+				temp.flags |= IPV6_SADDRSELECT_INTERFACE;
+			else
+				temp.flags &= ~IPV6_SADDRSELECT_INTERFACE;
+			if (!update)
+				update = (temp.flags&IPV6_SADDRSELECT_INTERFACE) -
+					 (candidate.flags&IPV6_SADDRSELECT_INTERFACE);
+			if (update < 0) {
+				continue;
 			}
-			read_unlock_bh(&idev->lock);
+
+			/* XXX: Rule 6: Prefer matching label */
+			if (ipv6_addrselect_label_lookup(&ifp->addr, dev->ifindex) == daddr_label)
+				temp.flags |= IPV6_SADDRSELECT_LABEL;
+			else
+				temp.flags &= ~IPV6_SADDRSELECT_LABEL;
+			if (!update)
+				update = (temp.flags&IPV6_SADDRSELECT_LABEL) -
+					 (candidate.flags&IPV6_SADDRSELECT_LABEL);
+			if (update < 0) {
+				continue;
+			}
+
+			/* XXX: Rule 7: Prefer public address */
+
+			/* Rule 8: Use longest matching prefix */
+			temp.matchlen = ipv6_addr_diff(&ifp->addr, daddr);
+			if (!update)
+				update = temp.matchlen - candidate.matchlen;
+			if (update < 0) {
+				continue;
+			}
+
+			/* Final Rule */
+			if (update <= 0)
+				continue;
+
+			/* update candidate */
+			temp.ifp = ifp;
+			in6_ifa_hold(ifp);
+			if (candidate.ifp)
+				in6_ifa_put(candidate.ifp);
+			candidate = temp;
 		}
+		read_unlock_bh(&idev->lock);
 	}
-
-out_unlock_base:
 	read_unlock(&addrconf_lock);
 	read_unlock(&dev_base_lock);
-
-out:
-	if (ifp == NULL) {
-		ifp = match;
-		match = NULL;
-	}
 
-	err = -EADDRNOTAVAIL;
-	if (ifp) {
-		ipv6_addr_copy(saddr, &ifp->addr);
+	if (candidate.ifp) {
+		ipv6_addr_copy(saddr, &candidate.ifp->addr);
+		in6_ifa_put(candidate.ifp);
 		err = 0;
-		in6_ifa_put(ifp);
+	} else {
+		err = -EADDRNOTAVAIL;
 	}
-	if (match)
-		in6_ifa_put(match);
-
 	return err;
 }
 
+int ipv6_get_saddr(struct dst_entry *dst,
+		   struct in6_addr *daddr, struct in6_addr *saddr)
+{
+	return ipv6_dev_get_saddr(dst ? ((struct rt6_info *)dst)->rt6i_dev : NULL,
+				  daddr, saddr);
+}
+
 int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr)
 {
 	struct inet6_dev *idev;
@@ -636,6 +773,69 @@
 	read_unlock_bh(&addrconf_hash_lock);
 
 	return ifp;
+}
+
+/* address selection: default policy label */
+/* XXX: user level configuration */
+static struct ipv6_addrselect_label {
+	struct in6_addr addr;
+	u16	plen;
+	u32	ifindex;
+	u32	label;
+} ipv6_addrselect_label_table[] = {
+	/* ::1/128, label = 0 */
+	{
+		.addr = {{{ [15] = 1 }}},
+		.plen = 128,
+		.label = 0,
+	},
+	/* ::/0, label = 1 */
+	{
+		.plen = 0,
+		.label = 1,
+	},
+	/* 2002::/16, label = 2 */
+	{
+		.addr = {{{ 0x20, 0x02 }}},
+		.plen = 16,
+		.label = 2,
+	},
+	/* ::/96, label = 3 */
+	{
+		.plen = 96,
+		.label = 3,
+	},
+	/* ::ffff:0:0/96, label = 4 */
+	{
+		.addr = {{{ [10] = 0xff, [11] = 0xff }}},
+		.plen = 96,
+		.label = 4,
+	},
+	/* sentinel */
+	{
+		.label = 0xffffffff,
+	}
+};
+
+static u32 ipv6_addrselect_label_lookup(const struct in6_addr *addr, 
+					int ifindex)
+{
+	struct ipv6_addrselect_label *p;
+	int plen, matchlen = -1;
+	u32 label = 0xffffffff;
+
+	for (p = ipv6_addrselect_label_table;
+	     p->label != 0xffffffff;
+	     p++) {
+		if (ifindex && p->ifindex && ifindex != p->ifindex)
+			continue;
+		plen = ipv6_addr_diff(addr, &p->addr);
+		if (plen < p->plen || plen < matchlen)
+			continue;
+		matchlen = plen;
+		label = p->label;
+	}
+	return label;
 }
 
 /* Gets referenced address, destroys ifaddr */

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] IPv6: Improvement of Source Address Selection
  2002-10-03 16:50 ` YOSHIFUJI Hideaki / 吉藤英明
@ 2002-10-04  6:32   ` Pekka Savola
  2002-10-05 18:33     ` YOSHIFUJI Hideaki / 吉藤英明
  0 siblings, 1 reply; 27+ messages in thread
From: Pekka Savola @ 2002-10-04  6:32 UTC (permalink / raw)
  To: YOSHIFUJI Hideaki / 吉藤英明; +Cc: netdev, usagi

There seems to be __constant_htonl there too but this is just a nit, and 
shouldn't a showstopper in the review.

A few comments, mainly on the spec perspective below.

Are IPv4 addresses represented as mapped addresses (as they should by the 
spec at least)?

There seem to be some points at section 4 of the draft (e.g. for multicast
destinations, MUST only pick addresses on the outgoing interface) that may
be missing?

> +#ifndef IPV6_ADDR_MC_SCOPE
> +#define IPV6_ADDR_MC_SCOPE(a)	\
> +	((a)->s6_addr[1] & 0x0f)	/* XXX nonstandard */
> +#define __IPV6_ADDR_SCOPE_RESERVED	-2
> +#define __IPV6_ADDR_SCOPE_ANY		-1
> +#define IPV6_ADDR_SCOPE_NODELOCAL	0x01
> +#define IPV6_ADDR_SCOPE_LINKLOCAL	0x02
> +#define IPV6_ADDR_SCOPE_SITELOCAL	0x05
> +#define IPV6_ADDR_SCOPE_ORGLOCAL	0x08
> +#define IPV6_ADDR_SCOPE_GLOBAL		0x0e
> +#endif

Aren't these definitions header file material, perhaps (I'd guess they 
might be useful in other .c files too).

> +int ipv6_addrselect_scope(const struct in6_addr *addr)
> +{
> +	u32 st;
> +
> +	st = addr->s6_addr32[0];
> +
> +	if ((st & __constant_htonl(0xE0000000)) != __constant_htonl(0x00000000) &&
> +	    (st & __constant_htonl(0xE0000000)) != __constant_htonl(0xE0000000))
> +		return IPV6_ADDR_SCOPE_GLOBAL;
> +
> +	if ((st & __constant_htonl(0xFF000000)) == __constant_htonl(0xFF000000))
> +		return IPV6_ADDR_MC_SCOPE(addr);
> +        
> +	if ((st & __constant_htonl(0xFFC00000)) == __constant_htonl(0xFE800000))
> +		return IPV6_ADDR_SCOPE_LINKLOCAL;
> +
> +	if ((st & __constant_htonl(0xFFC00000)) == __constant_htonl(0xFEC00000))
> +		return IPV6_ADDR_SCOPE_SITELOCAL;

Something similar to this is done in addrconf.c:ipv6_addr_type, could 
there be more reuse?

> +	if ((st | addr->s6_addr32[1]) == 0) {
> +		if (addr->s6_addr32[2] == 0) {
> +			if (addr->s6_addr32[3] == 0)
> +				return __IPV6_ADDR_SCOPE_ANY;
> +
> +			if (addr->s6_addr32[3] == __constant_htonl(0x00000001))
> +				return IPV6_ADDR_SCOPE_LINKLOCAL;	/* section 2.4 */
> +
> +			return IPV6_ADDR_SCOPE_GLOBAL;			/* section 2.3 */
> +		}

You're referring to sections 3.4 and 3.3, I think (similar in other 
comments)

> +		if (addr->s6_addr32[2] == __constant_htonl(0x0000FFFF)) {
> +			if (addr->s6_addr32[3] == __constant_htonl(0xA9FF0000))
> +				return IPV6_ADDR_SCOPE_LINKLOCAL;	/* section 2.2 */

Shouldn't that be 0xA9FE0000 if you mean IPv4 zeroconf 169.254.0.0/16 ?
(that could be spelt out in a comment.)

> +			if (addr->s6_addr32[3] == __constant_htonl(0xAC000000)) {
> +				if (addr->s6_addr32[3] == __constant_htonl(0xAC100000))
> +					return IPV6_ADDR_SCOPE_SITELOCAL;	/* section 2.2 */

172.16.00 -- 172.31.255.255, not just 172.16.*.*

> +				return IPV6_ADDR_SCOPE_LINKLOCAL;	/* section 2.2 */
> +			}

I don't understand this, this was possibly supposed to be the case for 
127.0.0.0/8 which should be treated as link-local?

-- 
Pekka Savola                 "Tell me of difficulties surmounted,
Netcore Oy                   not those you stumble over and fall"
Systems. Networks. Security.  -- Robert Jordan: A Crown of Swords

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] IPv6: Improvement of Source Address Selection
  2002-10-04  6:32   ` Pekka Savola
@ 2002-10-05 18:33     ` YOSHIFUJI Hideaki / 吉藤英明
  2002-10-10 14:29       ` Pekka Savola
  0 siblings, 1 reply; 27+ messages in thread
From: YOSHIFUJI Hideaki / 吉藤英明 @ 2002-10-05 18:33 UTC (permalink / raw)
  To: pekkas; +Cc: netdev, usagi

Thank you for your comments.

In article <Pine.LNX.4.44.0210040902010.16205-100000@netcore.fi> (at Fri, 4 Oct 2002 09:32:33 +0300 (EEST)), Pekka Savola <pekkas@netcore.fi> says:

> Are IPv4 addresses represented as mapped addresses (as they should by the 
> spec at least)?

see below.


> There seem to be some points at section 4 of the draft (e.g. for multicast
> destinations, MUST only pick addresses on the outgoing interface) that may
> be missing?

fixed.


> > +#ifndef IPV6_ADDR_MC_SCOPE
> > +#define IPV6_ADDR_MC_SCOPE(a)	\
> > +	((a)->s6_addr[1] & 0x0f)	/* XXX nonstandard */
:

> Aren't these definitions header file material, perhaps (I'd guess they 
> might be useful in other .c files too).

I thought that we would do it later, but anyway,
moved to include/net/ipv6.h.


> > +int ipv6_addrselect_scope(const struct in6_addr *addr)
:
> Something similar to this is done in addrconf.c:ipv6_addr_type, could 
> there be more reuse?

integrated core of the code to ipv6_addr_type().


> > +			if (addr->s6_addr32[3] == __constant_htonl(0x00000001))
> > +				return IPV6_ADDR_SCOPE_LINKLOCAL;	/* section 2.4 */
> > +
> > +			return IPV6_ADDR_SCOPE_GLOBAL;			/* section 2.3 */
> > +		}
> 
> You're referring to sections 3.4 and 3.3, I think (similar in other 
> comments)

fixed.


> > +		if (addr->s6_addr32[2] == __constant_htonl(0x0000FFFF)) {
> > +			if (addr->s6_addr32[3] == __constant_htonl(0xA9FF0000))
> > +				return IPV6_ADDR_SCOPE_LINKLOCAL;	/* section 2.2 */
> 
> Shouldn't that be 0xA9FE0000 if you mean IPv4 zeroconf 169.254.0.0/16 ?
> (that could be spelt out in a comment.)
> 
> > +			if (addr->s6_addr32[3] == __constant_htonl(0xAC000000)) {
> > +				if (addr->s6_addr32[3] == __constant_htonl(0xAC100000))
> > +					return IPV6_ADDR_SCOPE_SITELOCAL;	/* section 2.2 */
> 
> 172.16.00 -- 172.31.255.255, not just 172.16.*.*
> 
> > +				return IPV6_ADDR_SCOPE_LINKLOCAL;	/* section 2.2 */
> > +			}
> 
> I don't understand this, this was possibly supposed to be the case for 
> 127.0.0.0/8 which should be treated as link-local?

How stupid code I wrote... And,.. I reread the spec and found that 
ipv4-mapped addresses are global scope for source address selection.
So..., I removed above codes.

Well,

Following patch is against linux-2.4.19.

BTW, "IPv6: Miscellaneous clean-ups" (FIX_2_4_19_MISC_CLEANUPS-20020912) and 
this patch conflics.  What kind of patch do you prefer?
 1. patch on top of plain kernel
 2. patch on top of other-patched kernel
 3. patch with other patch (which conflicts) on top of plain kernel

Thank you in advance.

Index: include/net/addrconf.h
===================================================================
RCS file: /cvsroot/usagi/usagi-backport/linux24/include/net/addrconf.h,v
retrieving revision 1.1.1.1
retrieving revision 1.1.1.1.6.1
diff -u -r1.1.1.1 -r1.1.1.1.6.1
--- include/net/addrconf.h	2002/08/20 09:46:45	1.1.1.1
+++ include/net/addrconf.h	2002/09/26 19:15:15	1.1.1.1.6.1
@@ -55,6 +55,9 @@
 					      struct net_device *dev);
 extern struct inet6_ifaddr *	ipv6_get_ifaddr(struct in6_addr *addr,
 						struct net_device *dev);
+extern int			ipv6_dev_get_saddr(struct net_device *ddev,
+						   struct in6_addr *daddr,
+						   struct in6_addr *saddr);
 extern int			ipv6_get_saddr(struct dst_entry *dst, 
 					       struct in6_addr *daddr,
 					       struct in6_addr *saddr);
Index: include/net/ipv6.h
===================================================================
RCS file: /cvsroot/usagi/usagi-backport/linux24/include/net/ipv6.h,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 ipv6.h
--- include/net/ipv6.h	2002/08/20 09:46:45	1.1.1.1
+++ include/net/ipv6.h	2002/10/05 17:43:48
@@ -74,6 +74,20 @@
 #define IPV6_ADDR_RESERVED	0x2000U	/* reserved address space */
 
 /*
+ * 	Addr scopes
+ */
+#ifdef __KERNEL__
+#define IPV6_ADDR_MC_SCOPE(a)   \
+        ((a)->s6_addr[1] & 0x0f)        /* XXX nonstandard */
+#define __IPV6_ADDR_SCOPE_INVALID	-1
+#endif
+#define IPV6_ADDR_SCOPE_NODELOCAL       0x01
+#define IPV6_ADDR_SCOPE_LINKLOCAL       0x02
+#define IPV6_ADDR_SCOPE_SITELOCAL       0x05
+#define IPV6_ADDR_SCOPE_ORGLOCAL        0x08
+#define IPV6_ADDR_SCOPE_GLOBAL          0x0e
+
+/*
  *	fragmentation header
  */
 
@@ -203,12 +217,28 @@
 					   char *,
 					   unsigned int, unsigned int);
 
-
-extern int		ipv6_addr_type(struct in6_addr *addr);
+/*
+ *	Address manipulation functions
+ */
+extern int		__ipv6_addr_type(struct in6_addr *addr);
+static inline		int ipv6_addr_type(struct in6_addr *addr)
+{
+	return __ipv6_addr_type(addr) & 0xffff;
+}
 
 static inline int ipv6_addr_scope(struct in6_addr *addr)
+{
+	return __ipv6_addr_type(addr) & IPV6_ADDR_SCOPE_MASK;
+}
+
+static inline int __ipv6_addr_src_scope(int type)
+{
+	return type == IPV6_ADDR_ANY ? __IPV6_ADDR_SCOPE_INVALID : type>>16;
+}
+
+static inline int ipv6_addr_src_scope(struct in6_addr *addr)
 {
-	return ipv6_addr_type(addr) & IPV6_ADDR_SCOPE_MASK;
+	return __ipv6_addr_src_scope(__ipv6_addr_type(addr));
 }
 
 static inline int ipv6_addr_cmp(struct in6_addr *a1, struct in6_addr *a2)
Index: net/ipv6/addrconf.c
===================================================================
RCS file: /cvsroot/usagi/usagi-backport/linux24/net/ipv6/addrconf.c,v
retrieving revision 1.1.1.1
retrieving revision 1.1.1.1.6.16
diff -u -r1.1.1.1 -r1.1.1.1.6.16
--- net/ipv6/addrconf.c	2002/08/20 09:47:02	1.1.1.1
+++ net/ipv6/addrconf.c	2002/10/05 17:26:27	1.1.1.1.6.16
@@ -26,6 +26,10 @@
  *						packets.
  *	yoshfuji@USAGI			:       Fixed interval between DAD
  *						packets.
+ *	YOSHIFUJI Hideaki @USAGI	:	improved source address
+ *						selection; consider scope,
+ *						status etc.
+ *
  */
 
 #include <linux/config.h>
@@ -104,6 +108,8 @@
 
 static struct notifier_block *inet6addr_chain;
 
+static u32 ipv6_addrselect_label_lookup(const struct in6_addr *addr, int ifindex);
+
 struct ipv6_devconf ipv6_devconf =
 {
 	0,				/* forwarding		*/
@@ -132,7 +138,7 @@
 	MAX_RTR_SOLICITATION_DELAY,	/* rtr solicit delay	*/
 };
 
-int ipv6_addr_type(struct in6_addr *addr)
+int __ipv6_addr_type(struct in6_addr *addr)
 {
 	u32 st;
 
@@ -143,32 +149,38 @@
 	 */
 	if ((st & __constant_htonl(0xE0000000)) != __constant_htonl(0x00000000) &&
 	    (st & __constant_htonl(0xE0000000)) != __constant_htonl(0xE0000000))
-		return IPV6_ADDR_UNICAST;
+		return (IPV6_ADDR_UNICAST | 
+			IPV6_ADDR_SCOPE_GLOBAL<<16);
 
 	if ((st & __constant_htonl(0xFF000000)) == __constant_htonl(0xFF000000)) {
-		int type = IPV6_ADDR_MULTICAST;
+		/* multicast */
+		/* addr-select 3.1 */
+		int type = IPV6_ADDR_MC_SCOPE(addr)<<16;
 
-		switch((st & __constant_htonl(0x00FF0000))) {
-			case __constant_htonl(0x00010000):
+		switch(type) {
+			case IPV6_ADDR_SCOPE_NODELOCAL<<16:
 				type |= IPV6_ADDR_LOOPBACK;
 				break;
 
-			case __constant_htonl(0x00020000):
+			case IPV6_ADDR_SCOPE_LINKLOCAL<<16:
 				type |= IPV6_ADDR_LINKLOCAL;
 				break;
 
-			case __constant_htonl(0x00050000):
+			case IPV6_ADDR_SCOPE_SITELOCAL<<16:
 				type |= IPV6_ADDR_SITELOCAL;
 				break;
 		};
+		type |= IPV6_ADDR_MULTICAST;
 		return type;
 	}
 	
 	if ((st & __constant_htonl(0xFFC00000)) == __constant_htonl(0xFE800000))
-		return (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST);
+		return (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST | 
+			IPV6_ADDR_SCOPE_LINKLOCAL<<16);		/* addr-select 3.1 */
 
 	if ((st & __constant_htonl(0xFFC00000)) == __constant_htonl(0xFEC00000))
-		return (IPV6_ADDR_SITELOCAL | IPV6_ADDR_UNICAST);
+		return (IPV6_ADDR_SITELOCAL | IPV6_ADDR_UNICAST |
+			IPV6_ADDR_SCOPE_SITELOCAL<<16);		/* addr-select 3.1 */
 
 	if ((addr->s6_addr32[0] | addr->s6_addr32[1]) == 0) {
 		if (addr->s6_addr32[2] == 0) {
@@ -176,18 +188,52 @@
 				return IPV6_ADDR_ANY;
 
 			if (addr->s6_addr32[3] == __constant_htonl(0x00000001))
-				return (IPV6_ADDR_LOOPBACK | IPV6_ADDR_UNICAST);
+				return (IPV6_ADDR_LOOPBACK | IPV6_ADDR_UNICAST |
+					IPV6_ADDR_SCOPE_LINKLOCAL<<16);	/* addr-select 3.4 */
 
-			return (IPV6_ADDR_COMPATv4 | IPV6_ADDR_UNICAST);
+			return (IPV6_ADDR_COMPATv4 | IPV6_ADDR_UNICAST |
+				IPV6_ADDR_SCOPE_GLOBAL<<16);	/* addr-select 3.3 */
 		}
 
 		if (addr->s6_addr32[2] == __constant_htonl(0x0000ffff))
-			return IPV6_ADDR_MAPPED;
+			return (IPV6_ADDR_MAPPED | 
+				IPV6_ADDR_SCOPE_GLOBAL<<16);	/* addr-select 3.3 */
 	}
 
-	return IPV6_ADDR_RESERVED;
+	return (IPV6_ADDR_RESERVED | 
+		IPV6_ADDR_SCOPE_GLOBAL<<16);	/* addr-select 3.4 */
 }
 
+/* find 1st bit in difference between the 2 addrs */
+static inline int addr_diff(const void *__a1, const void *__a2, int addrlen)
+{
+	/* find 1st bit in difference between the 2 addrs.
+	 * bit may be an invalid value,
+	 * but if it is >= plen, the value is ignored in any case.
+	 */
+	const u32 *a1 = __a1;
+	const u32 *a2 = __a2;
+	int i;
+
+	addrlen >>= 2;
+	for (i = 0; i < addrlen; i++) {
+		u32 xb = a1[i] ^ a2[i];
+		if (xb) {
+			int j = 31;
+			xb = ntohl(xb);
+			while ((xb & (1 << j)) == 0)
+				j--;
+			return (i * 32 + 31 - j);
+		}
+	}
+	return addrlen<<5;
+}
+
+static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_addr *a2)
+{
+	 return addr_diff(a1->s6_addr, a2->s6_addr, sizeof(struct in6_addr));
+}
+
 static void addrconf_del_timer(struct inet6_ifaddr *ifp)
 {
 	if (del_timer(&ifp->timer))
@@ -449,122 +495,189 @@
 
 /*
  *	Choose an apropriate source address
- *	should do:
- *	i)	get an address with an apropriate scope
- *	ii)	see if there is a specific route for the destination and use
- *		an address of the attached interface 
- *	iii)	don't use deprecated addresses
+ *	draft-ietf-ipv6-default-addr-select-09.txt
  */
-int ipv6_get_saddr(struct dst_entry *dst,
-		   struct in6_addr *daddr, struct in6_addr *saddr)
+#define IPV6_SADDRSELECT_SELF		0x01
+#define IPV6_SADDRSELECT_PREFERRED	0x02
+#define IPV6_SADDRSELECT_HOME		0x04
+#define IPV6_SADDRSELECT_PUBLIC		0x08
+#define IPV6_SADDRSELECT_INTERFACE	0x10
+#define IPV6_SADDRSELECT_LABEL		0x20
+
+struct addrselect_attrs {
+	struct inet6_ifaddr *ifp;
+	u16	flags;
+	s16	matchlen;
+	u8	scope;
+};
+
+int ipv6_dev_get_saddr(struct net_device *daddr_dev,
+		       struct in6_addr *daddr, struct in6_addr *saddr)
 {
-	int scope;
-	struct inet6_ifaddr *ifp = NULL;
-	struct inet6_ifaddr *match = NULL;
-	struct net_device *dev = NULL;
+	int daddr_type, daddr_scope;
+	u32 daddr_label;
+	struct inet6_ifaddr *ifp0, *ifp = NULL;
+	struct net_device *dev;
 	struct inet6_dev *idev;
-	struct rt6_info *rt;
+
 	int err;
+	int update;
+	struct addrselect_attrs candidate = {NULL,0,0};
 
-	rt = (struct rt6_info *) dst;
-	if (rt)
-		dev = rt->rt6i_dev;
-
-	scope = ipv6_addr_scope(daddr);
-	if (rt && (rt->rt6i_flags & RTF_ALLONLINK)) {
-		/*
-		 *	route for the "all destinations on link" rule
-		 *	when no routers are present
-		 */
-		scope = IFA_LINK;
-	}
+	daddr_type = __ipv6_addr_type(daddr);
+	daddr_scope = __ipv6_addr_src_scope(daddr_type);
+	daddr_label = ipv6_addrselect_label_lookup(daddr, 
+						   daddr_dev?daddr_dev->ifindex:0);
 
-	/*
-	 *	known dev
-	 *	search dev and walk through dev addresses
-	 */
+	read_lock(&dev_base_lock);
+	read_lock(&addrconf_lock);
+	for (dev = dev_base; dev; dev=dev->next) {
+		idev = __in6_dev_get(dev);
 
-	if (dev) {
-		if (dev->flags & IFF_LOOPBACK)
-			scope = IFA_HOST;
+		if (!idev)
+			continue;
 
-		read_lock(&addrconf_lock);
-		idev = __in6_dev_get(dev);
-		if (idev) {
-			read_lock_bh(&idev->lock);
-			for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
-				if (ifp->scope == scope) {
-					if (!(ifp->flags & (IFA_F_DEPRECATED|IFA_F_TENTATIVE))) {
-						in6_ifa_hold(ifp);
-						read_unlock_bh(&idev->lock);
-						read_unlock(&addrconf_lock);
-						goto out;
-					}
-
-					if (!match && !(ifp->flags & IFA_F_TENTATIVE)) {
-						match = ifp;
-						in6_ifa_hold(ifp);
-					}
+		/* Rule 0: Candidate Source Address (section 4)
+		 *  - multicast and link-local destination address,
+		 *    the set of candidate source address MUST only
+		 *    include addresses assigned to interfaces
+		 *    belonging to the same link as the outgoing
+		 *    interface.
+		 * (- For site-local destination addresses, the
+		 *    set of candidate source addresses MUST only
+		 *    include addresses assigned to interfaces
+		 *    belonging to the same site as the outgoing
+		 *    interface.)
+		 */
+		if ((daddr_type&IPV6_ADDR_MULTICAST ||
+		     daddr_scope <= IPV6_ADDR_SCOPE_LINKLOCAL) &&
+		    daddr_dev && dev != daddr_dev)
+			continue;
+
+		read_lock_bh(&idev->lock);
+		ifp0 = idev->addr_list;
+		for (ifp=ifp0; ifp; ifp=ifp->if_next) {
+			struct addrselect_attrs temp = {NULL,0,0};
+			int addr_type;
+			update = 0;
+
+			/* Rule 0: Candidate Source Address (section 4)
+			 *  - In any case, anycast addresses, multicast
+			 *    addresses, and the unspecified address MUST
+			 *    NOT be included in a candidate set.
+			 */
+			addr_type = __ipv6_addr_type(&ifp->addr);
+			if (addr_type == IPV6_ADDR_ANY ||
+			    addr_type&IPV6_ADDR_MULTICAST)
+				continue;
+
+			/* Rule 1: Prefer same address */
+			if (ipv6_addr_cmp(&ifp->addr, daddr) == 0)
+				temp.flags |= IPV6_SADDRSELECT_SELF;
+			else
+				temp.flags &= ~IPV6_SADDRSELECT_SELF;
+				update = (temp.flags&IPV6_SADDRSELECT_SELF) -
+					 (candidate.flags&IPV6_SADDRSELECT_SELF);
+			if (update < 0) {
+				continue;
+			}
+
+			/* Rule 2: Prefer appropriate scope */
+			temp.scope = __ipv6_addr_src_scope(addr_type);
+			if (!update) {
+				update = temp.scope - candidate.scope;
+				if (update > 0) {
+					update = candidate.scope < daddr_scope ? 1 : -1;
+				} else if (update < 0) {
+					update = temp.scope < daddr_scope ? -1 : 1;
 				}
 			}
-			read_unlock_bh(&idev->lock);
-		}
-		read_unlock(&addrconf_lock);
-	}
+			if (update < 0) {
+				continue;
+			}
 
-	if (scope == IFA_LINK)
-		goto out;
+			/* Rule 3: Avoid deprecated address */
+			if (!(ifp->flags & IFA_F_DEPRECATED))
+				temp.flags |= IPV6_SADDRSELECT_PREFERRED;
+			else
+				temp.flags &= ~IPV6_SADDRSELECT_PREFERRED;
+			if (!update)
+				update = (temp.flags&IPV6_SADDRSELECT_PREFERRED) -
+					 (candidate.flags&IPV6_SADDRSELECT_PREFERRED);
+			if (update < 0) {
+				continue;
+			}
 
-	/*
-	 *	dev == NULL or search failed for specified dev
-	 */
+			/* XXX: Rule 4: Prefer home address */
 
-	read_lock(&dev_base_lock);
-	read_lock(&addrconf_lock);
-	for (dev = dev_base; dev; dev=dev->next) {
-		idev = __in6_dev_get(dev);
-		if (idev) {
-			read_lock_bh(&idev->lock);
-			for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
-				if (ifp->scope == scope) {
-					if (!(ifp->flags&(IFA_F_DEPRECATED|IFA_F_TENTATIVE))) {
-						in6_ifa_hold(ifp);
-						read_unlock_bh(&idev->lock);
-						goto out_unlock_base;
-					}
-
-					if (!match && !(ifp->flags&IFA_F_TENTATIVE)) {
-						match = ifp;
-						in6_ifa_hold(ifp);
-					}
-				}
+			/* Rule 5: Prefer outgoing interface */
+			if (daddr_dev == NULL || ifp->idev == NULL ||
+			    daddr_dev == ifp->idev->dev)
+				temp.flags |= IPV6_SADDRSELECT_INTERFACE;
+			else
+				temp.flags &= ~IPV6_SADDRSELECT_INTERFACE;
+			if (!update)
+				update = (temp.flags&IPV6_SADDRSELECT_INTERFACE) -
+					 (candidate.flags&IPV6_SADDRSELECT_INTERFACE);
+			if (update < 0) {
+				continue;
+			}
+
+			/* XXX: Rule 6: Prefer matching label */
+			if (ipv6_addrselect_label_lookup(&ifp->addr, dev->ifindex) == daddr_label)
+				temp.flags |= IPV6_SADDRSELECT_LABEL;
+			else
+				temp.flags &= ~IPV6_SADDRSELECT_LABEL;
+			if (!update)
+				update = (temp.flags&IPV6_SADDRSELECT_LABEL) -
+					 (candidate.flags&IPV6_SADDRSELECT_LABEL);
+			if (update < 0) {
+				continue;
+			}
+
+			/* XXX: Rule 7: Prefer public address */
+
+			/* Rule 8: Use longest matching prefix */
+			temp.matchlen = ipv6_addr_diff(&ifp->addr, daddr);
+			if (!update)
+				update = temp.matchlen - candidate.matchlen;
+			if (update < 0) {
+				continue;
 			}
-			read_unlock_bh(&idev->lock);
+
+			/* Final Rule */
+			if (update <= 0)
+				continue;
+
+			/* update candidate */
+			temp.ifp = ifp;
+			in6_ifa_hold(ifp);
+			if (candidate.ifp)
+				in6_ifa_put(candidate.ifp);
+			candidate = temp;
 		}
+		read_unlock_bh(&idev->lock);
 	}
-
-out_unlock_base:
 	read_unlock(&addrconf_lock);
 	read_unlock(&dev_base_lock);
-
-out:
-	if (ifp == NULL) {
-		ifp = match;
-		match = NULL;
-	}
 
-	err = -EADDRNOTAVAIL;
-	if (ifp) {
-		ipv6_addr_copy(saddr, &ifp->addr);
+	if (candidate.ifp) {
+		ipv6_addr_copy(saddr, &candidate.ifp->addr);
+		in6_ifa_put(candidate.ifp);
 		err = 0;
-		in6_ifa_put(ifp);
+	} else {
+		err = -EADDRNOTAVAIL;
 	}
-	if (match)
-		in6_ifa_put(match);
-
 	return err;
 }
 
+int ipv6_get_saddr(struct dst_entry *dst,
+		   struct in6_addr *daddr, struct in6_addr *saddr)
+{
+	return ipv6_dev_get_saddr(dst ? ((struct rt6_info *)dst)->rt6i_dev : NULL,
+				  daddr, saddr);
+}
+
 int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr)
 {
 	struct inet6_dev *idev;
@@ -636,6 +749,69 @@
 	read_unlock_bh(&addrconf_hash_lock);
 
 	return ifp;
+}
+
+/* address selection: default policy label */
+/* XXX: user level configuration */
+static struct ipv6_addrselect_label {
+	struct in6_addr addr;
+	u16	plen;
+	u32	ifindex;
+	u32	label;
+} ipv6_addrselect_label_table[] = {
+	/* ::1/128, label = 0 */
+	{
+		.addr = {{{ [15] = 1 }}},
+		.plen = 128,
+		.label = 0,
+	},
+	/* ::/0, label = 1 */
+	{
+		.plen = 0,
+		.label = 1,
+	},
+	/* 2002::/16, label = 2 */
+	{
+		.addr = {{{ 0x20, 0x02 }}},
+		.plen = 16,
+		.label = 2,
+	},
+	/* ::/96, label = 3 */
+	{
+		.plen = 96,
+		.label = 3,
+	},
+	/* ::ffff:0:0/96, label = 4 */
+	{
+		.addr = {{{ [10] = 0xff, [11] = 0xff }}},
+		.plen = 96,
+		.label = 4,
+	},
+	/* sentinel */
+	{
+		.label = 0xffffffff,
+	}
+};
+
+static u32 ipv6_addrselect_label_lookup(const struct in6_addr *addr, 
+					int ifindex)
+{
+	struct ipv6_addrselect_label *p;
+	int plen, matchlen = -1;
+	u32 label = 0xffffffff;
+
+	for (p = ipv6_addrselect_label_table;
+	     p->label != 0xffffffff;
+	     p++) {
+		if (ifindex && p->ifindex && ifindex != p->ifindex)
+			continue;
+		plen = ipv6_addr_diff(addr, &p->addr);
+		if (plen < p->plen || plen < matchlen)
+			continue;
+		matchlen = plen;
+		label = p->label;
+	}
+	return label;
 }
 
 /* Gets referenced address, destroys ifaddr */

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] IPv6: Improvement of Source Address Selection
  2002-10-05 18:33     ` YOSHIFUJI Hideaki / 吉藤英明
@ 2002-10-10 14:29       ` Pekka Savola
  2002-10-10 15:23         ` YOSHIFUJI Hideaki / 吉藤英明
  0 siblings, 1 reply; 27+ messages in thread
From: Pekka Savola @ 2002-10-10 14:29 UTC (permalink / raw)
  To: YOSHIFUJI Hideaki / 吉藤英明; +Cc: netdev, usagi

On Sun, 6 Oct 2002, YOSHIFUJI Hideaki / [iso-2022-jp] ^[$B5HF#1QL@^[(B wrote:

Dave, Alexey.. I think there has to be a high-level decision on how to
proceed here.  I'm referring to the optimization.  TCP or other
connection-oriented protocols use this one per connection; UDP and the
like probably once per packet.  The latter is at least quite undesirable,
as you pointed out.

The question is how one can proceed here, what kind of caching or a 
the type of approach taken.

Putting the stuff in the routing table could work, but then this algorithm
would have to be re-run always when there are changes in any address in
the node.  There might be other ways.

-- 
Pekka Savola                 "Tell me of difficulties surmounted,
Netcore Oy                   not those you stumble over and fall"
Systems. Networks. Security.  -- Robert Jordan: A Crown of Swords

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] IPv6: Improvement of Source Address Selection
  2002-10-10 14:29       ` Pekka Savola
@ 2002-10-10 15:23         ` YOSHIFUJI Hideaki / 吉藤英明
  0 siblings, 0 replies; 27+ messages in thread
From: YOSHIFUJI Hideaki / 吉藤英明 @ 2002-10-10 15:23 UTC (permalink / raw)
  To: pekkas; +Cc: netdev, usagi

In article <Pine.LNX.4.44.0210101724030.9287-100000@netcore.fi> (at Thu, 10 Oct 2002 17:29:38 +0300 (EEST)), Pekka Savola <pekkas@netcore.fi> says:

> Dave, Alexey.. I think there has to be a high-level decision on how to
> proceed here.  I'm referring to the optimization.  TCP or other
> connection-oriented protocols use this one per connection; UDP and the
> like probably once per packet.  The latter is at least quite undesirable,
> as you pointed out.

Hmm, what we think is, performance critical applications such as DVTS
(Digital Video Transport System) will do bind(2) so the latter is not 
fatal problem.


We need improved source address selection for further feature(s)
(like privacy extentions).

Our code is easy to implement further rules and order of calculation
cost is same as before; O(n) while n is number of addresses.

We think netdev and/or usagi can develop optimization later,
and we think people can survive until then.


> Putting the stuff in the routing table could work, but then this algorithm
> would have to be re-run always when there are changes in any address in
> the node.  There might be other ways.

How about
 - store one (daddr,ddev,saddr,tstamp) set in sk
 - update addrconf_tstamp in addrconf_verify() (etc.)
 - check tstamp and addrconf_tstamp and use saddr if ok in saddr selection

--yoshfuji

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2002-10-10 15:23 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-09-27 15:17 [PATCH] IPv6: Improvement of Source Address Selection YOSHIFUJI Hideaki / 吉藤英明
2002-09-27 16:02 ` kuznet
2002-09-27 16:28   ` Pekka Savola
2002-09-27 16:55     ` kuznet
2002-09-28  1:28 ` David S. Miller
2002-09-28  2:28   ` kuznet
2002-09-28  2:34     ` Andi Kleen
2002-09-28  2:35     ` David S. Miller
2002-09-28  2:58       ` kuznet
2002-09-28  2:55         ` David S. Miller
2002-09-28  3:38           ` kuznet
2002-09-28  3:36             ` David S. Miller
2002-09-28  4:19               ` kuznet
2002-09-28  4:30                 ` YOSHIFUJI Hideaki / 吉藤英明
2002-09-28  4:44                   ` kuznet
2002-09-28  5:14                     ` YOSHIFUJI Hideaki / 吉藤英明
2002-09-28  5:26                       ` kuznet
2002-09-28  4:35                 ` Pekka Savola
2002-09-28  5:00                   ` kuznet
2002-09-28  5:24                     ` Pekka Savola
2002-09-28  5:37                       ` kuznet
2002-09-29  8:41                         ` Pekka Savola
2002-10-03 16:50 ` YOSHIFUJI Hideaki / 吉藤英明
2002-10-04  6:32   ` Pekka Savola
2002-10-05 18:33     ` YOSHIFUJI Hideaki / 吉藤英明
2002-10-10 14:29       ` Pekka Savola
2002-10-10 15:23         ` YOSHIFUJI Hideaki / 吉藤英明

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).