* [PATCH] IPv6: Improvement of Source Address Selection
@ 2002-09-27 15:17 YOSHIFUJI Hideaki / 吉藤英明
2002-09-27 16:02 ` kuznet
` (2 more replies)
0 siblings, 3 replies; 27+ messages in thread
From: YOSHIFUJI Hideaki / 吉藤英明 @ 2002-09-27 15:17 UTC (permalink / raw)
To: linux-kernel, netdev; +Cc: usagi
Hello!
This patch supports standard default source address selection
algorithm. It takes status, address/prefix itself (prefer same address,
prefer longest matching prefix) into consideration.
Note: Even though matching label is not implemented yet,
this is better than current one.
Following patch is against linux-2.4.19.
Thank you in advance.
-------------------------------------------------------------------
Patch-Name: Improvement of Source Address Selection
Patch-Id: FIX_2_4_19_SADDRSELECT-20020906
Patch-Author: YOSHIFUJI Hideaki / USAGI Project <yoshfuji@linux-ipv6.org>
Credit: YOSHIFUJI Hideaki / USAGI Project <yoshfuji@linux-ipv6.org>
Reference: draft-ietf-ipv6-default-addr-select-09.txt
-------------------------------------------------------------------
Index: include/net/addrconf.h
===================================================================
RCS file: /cvsroot/usagi/usagi-backport/linux24/include/net/addrconf.h,v
retrieving revision 1.1.1.1
retrieving revision 1.1.1.1.6.1
diff -u -r1.1.1.1 -r1.1.1.1.6.1
--- include/net/addrconf.h 2002/08/20 09:46:45 1.1.1.1
+++ include/net/addrconf.h 2002/09/26 19:15:15 1.1.1.1.6.1
@@ -55,6 +55,9 @@
struct net_device *dev);
extern struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr,
struct net_device *dev);
+extern int ipv6_dev_get_saddr(struct net_device *ddev,
+ struct in6_addr *daddr,
+ struct in6_addr *saddr);
extern int ipv6_get_saddr(struct dst_entry *dst,
struct in6_addr *daddr,
struct in6_addr *saddr);
Index: net/ipv6/addrconf.c
===================================================================
RCS file: /cvsroot/usagi/usagi-backport/linux24/net/ipv6/addrconf.c,v
retrieving revision 1.1.1.1
retrieving revision 1.1.1.1.6.4
diff -u -r1.1.1.1 -r1.1.1.1.6.4
--- net/ipv6/addrconf.c 2002/08/20 09:47:02 1.1.1.1
+++ net/ipv6/addrconf.c 2002/09/26 19:28:13 1.1.1.1.6.4
@@ -26,6 +26,10 @@
* packets.
* yoshfuji@USAGI : Fixed interval between DAD
* packets.
+ * YOSHIFUJI Hideaki @USAGI : improved source address
+ * selection; consider scope,
+ * status etc.
+ *
*/
#include <linux/config.h>
@@ -188,6 +192,99 @@
return IPV6_ADDR_RESERVED;
}
+#ifndef IPV6_ADDR_MC_SCOPE
+#define IPV6_ADDR_MC_SCOPE(a) \
+ ((a)->s6_addr[1] & 0x0f) /* XXX nonstandard */
+#define __IPV6_ADDR_SCOPE_RESERVED -2
+#define __IPV6_ADDR_SCOPE_ANY -1
+#define IPV6_ADDR_SCOPE_NODELOCAL 0x01
+#define IPV6_ADDR_SCOPE_LINKLOCAL 0x02
+#define IPV6_ADDR_SCOPE_SITELOCAL 0x05
+#define IPV6_ADDR_SCOPE_ORGLOCAL 0x08
+#define IPV6_ADDR_SCOPE_GLOBAL 0x0e
+#endif
+
+int ipv6_addrselect_scope(const struct in6_addr *addr)
+{
+ u32 st;
+
+ st = addr->s6_addr32[0];
+
+ if ((st & __constant_htonl(0xE0000000)) != __constant_htonl(0x00000000) &&
+ (st & __constant_htonl(0xE0000000)) != __constant_htonl(0xE0000000))
+ return IPV6_ADDR_SCOPE_GLOBAL;
+
+ if ((st & __constant_htonl(0xFF000000)) == __constant_htonl(0xFF000000))
+ return IPV6_ADDR_MC_SCOPE(addr);
+
+ if ((st & __constant_htonl(0xFFC00000)) == __constant_htonl(0xFE800000))
+ return IPV6_ADDR_SCOPE_LINKLOCAL;
+
+ if ((st & __constant_htonl(0xFFC00000)) == __constant_htonl(0xFEC00000))
+ return IPV6_ADDR_SCOPE_SITELOCAL;
+
+ if ((st | addr->s6_addr32[1]) == 0) {
+ if (addr->s6_addr32[2] == 0) {
+ if (addr->s6_addr32[3] == 0)
+ return __IPV6_ADDR_SCOPE_ANY;
+
+ if (addr->s6_addr32[3] == __constant_htonl(0x00000001))
+ return IPV6_ADDR_SCOPE_LINKLOCAL; /* section 2.4 */
+
+ return IPV6_ADDR_SCOPE_GLOBAL; /* section 2.3 */
+ }
+
+ if (addr->s6_addr32[2] == __constant_htonl(0x0000FFFF)) {
+ if (addr->s6_addr32[3] == __constant_htonl(0xA9FF0000))
+ return IPV6_ADDR_SCOPE_LINKLOCAL; /* section 2.2 */
+ if (addr->s6_addr32[3] == __constant_htonl(0xAC000000)) {
+ if (addr->s6_addr32[3] == __constant_htonl(0xAC100000))
+ return IPV6_ADDR_SCOPE_SITELOCAL; /* section 2.2 */
+
+ return IPV6_ADDR_SCOPE_LINKLOCAL; /* section 2.2 */
+ }
+ if (addr->s6_addr32[3] == __constant_htonl(0x0A000000))
+ return IPV6_ADDR_SCOPE_SITELOCAL; /* section 2.2 */
+ if (addr->s6_addr32[3] == __constant_htonl(0xC0A80000))
+ return IPV6_ADDR_SCOPE_SITELOCAL; /* section 2.2 */
+
+ return IPV6_ADDR_SCOPE_GLOBAL; /* section 2.2 */
+ }
+ }
+
+ return __IPV6_ADDR_SCOPE_RESERVED;
+}
+
+/* find 1st bit in difference between the 2 addrs */
+static inline int addr_diff(const void *__a1, const void *__a2, int addrlen)
+{
+ /* find 1st bit in difference between the 2 addrs.
+ * bit may be an invalid value,
+ * but if it is >= plen, the value is ignored in any case.
+ */
+ const u32 *a1 = __a1;
+ const u32 *a2 = __a2;
+ int i;
+
+ addrlen >>= 2;
+ for (i = 0; i < addrlen; i++) {
+ u32 xb = a1[i] ^ a2[i];
+ if (xb) {
+ int j = 31;
+ xb = ntohl(xb);
+ while ((xb & (1 << j)) == 0)
+ j--;
+ return (i * 32 + 31 - j);
+ }
+ }
+ return addrlen<<5;
+}
+
+static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_addr *a2)
+{
+ return addr_diff(a1->s6_addr, a2->s6_addr, sizeof(struct in6_addr));
+}
+
static void addrconf_del_timer(struct inet6_ifaddr *ifp)
{
if (del_timer(&ifp->timer))
@@ -449,120 +546,137 @@
/*
* Choose an apropriate source address
- * should do:
- * i) get an address with an apropriate scope
- * ii) see if there is a specific route for the destination and use
- * an address of the attached interface
- * iii) don't use deprecated addresses
+ * draft-ietf-ipngwg-default-addr-select-09.txt
*/
-int ipv6_get_saddr(struct dst_entry *dst,
- struct in6_addr *daddr, struct in6_addr *saddr)
+struct addrselect_attrs {
+ struct inet6_ifaddr *ifp;
+ int match;
+ int deprecated;
+ int home;
+ int temporary;
+ int device;
+ int scope;
+ int label;
+ int matchlen;
+};
+
+int ipv6_dev_get_saddr(struct net_device *daddr_dev,
+ struct in6_addr *daddr, struct in6_addr *saddr)
{
- int scope;
- struct inet6_ifaddr *ifp = NULL;
- struct inet6_ifaddr *match = NULL;
- struct net_device *dev = NULL;
+ int daddr_scope;
+ struct inet6_ifaddr *ifp0, *ifp = NULL;
+ struct net_device *dev;
struct inet6_dev *idev;
- struct rt6_info *rt;
- int err;
- rt = (struct rt6_info *) dst;
- if (rt)
- dev = rt->rt6i_dev;
-
- scope = ipv6_addr_scope(daddr);
- if (rt && (rt->rt6i_flags & RTF_ALLONLINK)) {
- /*
- * route for the "all destinations on link" rule
- * when no routers are present
- */
- scope = IFA_LINK;
- }
-
- /*
- * known dev
- * search dev and walk through dev addresses
- */
+ int err;
+ int update;
+ struct addrselect_attrs candidate = {NULL,0,0,0,0,0,0,0,0};
- if (dev) {
- if (dev->flags & IFF_LOOPBACK)
- scope = IFA_HOST;
+ daddr_scope = ipv6_addrselect_scope(daddr);
- read_lock(&addrconf_lock);
+ read_lock(&dev_base_lock);
+ read_lock(&addrconf_lock);
+ for (dev = dev_base; dev; dev=dev->next) {
idev = __in6_dev_get(dev);
- if (idev) {
- read_lock_bh(&idev->lock);
- for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
- if (ifp->scope == scope) {
- if (!(ifp->flags & (IFA_F_DEPRECATED|IFA_F_TENTATIVE))) {
- in6_ifa_hold(ifp);
- read_unlock_bh(&idev->lock);
- read_unlock(&addrconf_lock);
- goto out;
- }
-
- if (!match && !(ifp->flags & IFA_F_TENTATIVE)) {
- match = ifp;
- in6_ifa_hold(ifp);
- }
+
+ if (!idev)
+ continue;
+
+ read_lock_bh(&idev->lock);
+ ifp0 = idev->addr_list;
+ for (ifp=ifp0; ifp; ifp=ifp->if_next) {
+ struct addrselect_attrs temp = {NULL,0,0,0,0,0,0,0,0};
+ update = 0;
+
+ /* Rule 1: Prefer same address */
+ temp.match = ipv6_addr_cmp(&ifp->addr, daddr) == 0;
+ if (!update)
+ update = temp.match - candidate.match;
+ if (update < 0) {
+ continue;
+ }
+
+ /* Rule 2: Prefer appropriate scope */
+ temp.scope = ipv6_addrselect_scope(&ifp->addr);
+ if (!update) {
+ update = temp.scope - candidate.scope;
+ if (update > 0) {
+ update = candidate.scope < daddr_scope ? 1 : -1;
+ } else if (update < 0) {
+ update = temp.scope < daddr_scope ? -1 : 1;
}
}
- read_unlock_bh(&idev->lock);
- }
- read_unlock(&addrconf_lock);
- }
+ if (update < 0) {
+ continue;
+ }
- if (scope == IFA_LINK)
- goto out;
+ /* Rule 3: Avoid deprecated address */
+ temp.deprecated = ifp->flags & IFA_F_DEPRECATED;
+ if (!update)
+ update = candidate.deprecated - temp.deprecated;
+ if (update < 0) {
+ continue;
+ }
- /*
- * dev == NULL or search failed for specified dev
- */
+ /* XXX: Rule 4: Prefer home address */
- read_lock(&dev_base_lock);
- read_lock(&addrconf_lock);
- for (dev = dev_base; dev; dev=dev->next) {
- idev = __in6_dev_get(dev);
- if (idev) {
- read_lock_bh(&idev->lock);
- for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
- if (ifp->scope == scope) {
- if (!(ifp->flags&(IFA_F_DEPRECATED|IFA_F_TENTATIVE))) {
- in6_ifa_hold(ifp);
- read_unlock_bh(&idev->lock);
- goto out_unlock_base;
- }
-
- if (!match && !(ifp->flags&IFA_F_TENTATIVE)) {
- match = ifp;
- in6_ifa_hold(ifp);
- }
- }
+ /* Rule 5: Prefer outgoing interface */
+ temp.device = daddr_dev ? daddr_dev == (ifp->idev ? ifp->idev->dev : daddr_dev) : 0;
+ if (!update)
+ update = temp.device - candidate.device;
+ if (update < 0) {
+ continue;
+ }
+
+ /* XXX: Rule 6: Prefer matching label */
+ temp.label = 0;
+ if (!update)
+ update = temp.label - candidate.label;
+ if (update < 0) {
+ continue;
}
- read_unlock_bh(&idev->lock);
+
+ /* XXX: Rule 7: Prefer public address */
+
+ /* Rule 8: Use longest matching prefix */
+ temp.matchlen = ipv6_addr_diff(&ifp->addr, daddr);
+ if (!update)
+ update = temp.matchlen - candidate.matchlen;
+ if (update < 0) {
+ continue;
+ }
+
+ /* Final Rule */
+ if (update <= 0)
+ continue;
+
+ /* update candidate */
+ temp.ifp = ifp;
+ in6_ifa_hold(ifp);
+ if (candidate.ifp)
+ in6_ifa_put(candidate.ifp);
+ candidate = temp;
}
+ read_unlock_bh(&idev->lock);
}
-
-out_unlock_base:
read_unlock(&addrconf_lock);
read_unlock(&dev_base_lock);
-
-out:
- if (ifp == NULL) {
- ifp = match;
- match = NULL;
- }
- err = -EADDRNOTAVAIL;
- if (ifp) {
- ipv6_addr_copy(saddr, &ifp->addr);
+ if (candidate.ifp) {
+ ipv6_addr_copy(saddr, &candidate.ifp->addr);
+ in6_ifa_put(candidate.ifp);
err = 0;
- in6_ifa_put(ifp);
+ } else {
+ err = -EADDRNOTAVAIL;
}
- if (match)
- in6_ifa_put(match);
-
return err;
+}
+
+int ipv6_get_saddr(struct dst_entry *dst,
+ struct in6_addr *daddr, struct in6_addr *saddr)
+{
+ return ipv6_dev_get_saddr(dst ? ((struct rt6_info *)dst)->rt6i_dev : NULL,
+ daddr, saddr);
}
int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr)
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH] IPv6: Improvement of Source Address Selection 2002-09-27 15:17 [PATCH] IPv6: Improvement of Source Address Selection YOSHIFUJI Hideaki / 吉藤英明 @ 2002-09-27 16:02 ` kuznet 2002-09-27 16:28 ` Pekka Savola 2002-09-28 1:28 ` David S. Miller 2002-10-03 16:50 ` YOSHIFUJI Hideaki / 吉藤英明 2 siblings, 1 reply; 27+ messages in thread From: kuznet @ 2002-09-27 16:02 UTC (permalink / raw) To: YOSHIFUJI Hideaki / 吉藤英明; +Cc: netdev Hello! > This patch supports standard default source address selection > algorithm. To all that I remember we had long discussion about this ages ago. I said I hate this. Such complicated selection without caching is _bug_. I see nothing improved since that time, except for the function became even more hairy. :-) Alexey ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] IPv6: Improvement of Source Address Selection 2002-09-27 16:02 ` kuznet @ 2002-09-27 16:28 ` Pekka Savola 2002-09-27 16:55 ` kuznet 0 siblings, 1 reply; 27+ messages in thread From: Pekka Savola @ 2002-09-27 16:28 UTC (permalink / raw) To: kuznet; +Cc: YOSHIFUJI Hideaki / 吉藤英明, netdev On Fri, 27 Sep 2002 kuznet@ms2.inr.ac.ru wrote: > > This patch supports standard default source address selection > > algorithm. > > To all that I remember we had long discussion about this ages ago. > I said I hate this. Such complicated selection without caching is _bug_. > I see nothing improved since that time, except for the function became > even more hairy. :-) But you agree that a new selection is important, I think? I agree that the spec as written (like, each address against every other, iterate N times etc.) seems to be like total crap.. but at least the intent seems to be clear-ish. If caching was implemented I guess it would be triggered by: - address changes - route changes - a maximum lifetime of xx seconds? Caching, if it can be done simply and reasonably seems like a very good idea to me. Btw I think labels are quite an important component of selection rules, as it (similar to longest matching prefix) keeps certain classes of addresses (e.g. 6to4, mapped addresses, compatible etc.) within the label. That's important. User-manageable policy table is of less importance I think. -- Pekka Savola "Tell me of difficulties surmounted, Netcore Oy not those you stumble over and fall" Systems. Networks. Security. -- Robert Jordan: A Crown of Swords ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] IPv6: Improvement of Source Address Selection 2002-09-27 16:28 ` Pekka Savola @ 2002-09-27 16:55 ` kuznet 0 siblings, 0 replies; 27+ messages in thread From: kuznet @ 2002-09-27 16:55 UTC (permalink / raw) To: Pekka Savola; +Cc: yoshfuji, netdev Hello! > But you agree that a new selection is important, I think? It is the thing which would be silly to disagree, people want this. :-) I do not want bogus implementation blocking attempts to select address for O(1) time. > If caching was implemented I guess it would be triggered by: It is just cached in routes like IP makes this. There is nothing sophisticated there, the logic is aligned to logic of routing cache. Alexey ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] IPv6: Improvement of Source Address Selection 2002-09-27 15:17 [PATCH] IPv6: Improvement of Source Address Selection YOSHIFUJI Hideaki / 吉藤英明 2002-09-27 16:02 ` kuznet @ 2002-09-28 1:28 ` David S. Miller 2002-09-28 2:28 ` kuznet 2002-10-03 16:50 ` YOSHIFUJI Hideaki / 吉藤英明 2 siblings, 1 reply; 27+ messages in thread From: David S. Miller @ 2002-09-28 1:28 UTC (permalink / raw) To: yoshfuji; +Cc: linux-kernel, netdev, usagi, kuznet From: YOSHIFUJI Hideaki / 吉藤英明 <yoshfuji@linux-ipv6.org> Date: Sat, 28 Sep 2002 00:17:42 +0900 (JST) Please redesign this structure. +struct addrselect_attrs { + struct inet6_ifaddr *ifp; + int match; + int deprecated; + int home; + int temporary; + int device; + int scope; + int label; + int matchlen; +}; This is much larger than it needs to be. Please replace these "int" binary states with single "u32 flags;" and appropriate bit definitions. This structure sits on the stack, so it is important to be as small as we can easily make it. Otherwise I have no problems with the patch, Alexey? ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] IPv6: Improvement of Source Address Selection 2002-09-28 1:28 ` David S. Miller @ 2002-09-28 2:28 ` kuznet 2002-09-28 2:34 ` Andi Kleen 2002-09-28 2:35 ` David S. Miller 0 siblings, 2 replies; 27+ messages in thread From: kuznet @ 2002-09-28 2:28 UTC (permalink / raw) To: David S. Miller; +Cc: yoshfuji, linux-kernel, netdev, usagi Hello! > Otherwise I have no problems with the patch, Alexey? I have... The implementation is bad. Source address must be retieved from route, not running this elephant function each packet. Alexey ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] IPv6: Improvement of Source Address Selection 2002-09-28 2:28 ` kuznet @ 2002-09-28 2:34 ` Andi Kleen 2002-09-28 2:35 ` David S. Miller 1 sibling, 0 replies; 27+ messages in thread From: Andi Kleen @ 2002-09-28 2:34 UTC (permalink / raw) To: kuznet; +Cc: David S. Miller, yoshfuji, linux-kernel, netdev, usagi On Sat, Sep 28, 2002 at 06:28:29AM +0400, A.N.Kuznetsov wrote: > Hello! > > > Otherwise I have no problems with the patch, Alexey? > > I have... The implementation is bad. Source address must be retieved > from route, not running this elephant function each packet. So it just needs to be moved into ip_route_output, right ? -Andi ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] IPv6: Improvement of Source Address Selection 2002-09-28 2:28 ` kuznet 2002-09-28 2:34 ` Andi Kleen @ 2002-09-28 2:35 ` David S. Miller 2002-09-28 2:58 ` kuznet 1 sibling, 1 reply; 27+ messages in thread From: David S. Miller @ 2002-09-28 2:35 UTC (permalink / raw) To: kuznet; +Cc: yoshfuji, linux-kernel, netdev, usagi From: kuznet@ms2.inr.ac.ru Date: Sat, 28 Sep 2002 06:28:29 +0400 (MSD) > Otherwise I have no problems with the patch, Alexey? I have... The implementation is bad. Source address must be retieved from route, not running this elephant function each packet. This only runs at connect time, and when NULL fl->fl6_src is seen by ip6_build_xmit() (this means RAW,UDP,ICMP which must make these decisions anyways). Is there really so much computation to be saved by moving this to ipv6 route? ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] IPv6: Improvement of Source Address Selection 2002-09-28 2:35 ` David S. Miller @ 2002-09-28 2:58 ` kuznet 2002-09-28 2:55 ` David S. Miller 0 siblings, 1 reply; 27+ messages in thread From: kuznet @ 2002-09-28 2:58 UTC (permalink / raw) To: David S. Miller; +Cc: yoshfuji, linux-kernel, netdev, usagi Hello! > This only runs at connect time ... and also at ip6_build_xmit(). Connected dgram sockets are marginal. Alexey ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] IPv6: Improvement of Source Address Selection 2002-09-28 2:58 ` kuznet @ 2002-09-28 2:55 ` David S. Miller 2002-09-28 3:38 ` kuznet 0 siblings, 1 reply; 27+ messages in thread From: David S. Miller @ 2002-09-28 2:55 UTC (permalink / raw) To: kuznet; +Cc: yoshfuji, linux-kernel, netdev, usagi From: kuznet@ms2.inr.ac.ru Date: Sat, 28 Sep 2002 06:58:22 +0400 (MSD) > This only runs at connect time ... and also at ip6_build_xmit(). Connected dgram sockets are marginal. I said UDP/RAW. At least believe that I am this smart :-) Point is that current function is not tiny either, so improvement you suggest applies both to current code and code after Yoshi's change. ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] IPv6: Improvement of Source Address Selection 2002-09-28 2:55 ` David S. Miller @ 2002-09-28 3:38 ` kuznet 2002-09-28 3:36 ` David S. Miller 0 siblings, 1 reply; 27+ messages in thread From: kuznet @ 2002-09-28 3:38 UTC (permalink / raw) To: David S. Miller; +Cc: yoshfuji, linux-kernel, netdev, usagi Hello! > suggest applies both to current code and code after Yoshi's change. This is wrong, unfortunately. The elimination of ipv6_get_saddr() was trivial before this patch (because of independance of preferred source on real destination, only on scope), the corresponding fix was withdrawn from 2.4 only for sake of this feature, pending as a well-known patch. Now I see retransmission of practicllay the same patch, which was deferred for improvement that time. Citing myself two years younger: > The first priority task is to eliminate address selection function. > > Without this odd feature it was easy and, in fact, address selection > patches forced me to withdraw the solution from kernel, because > it makes these hacks much more difficult. Alexey ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] IPv6: Improvement of Source Address Selection 2002-09-28 3:38 ` kuznet @ 2002-09-28 3:36 ` David S. Miller 2002-09-28 4:19 ` kuznet 0 siblings, 1 reply; 27+ messages in thread From: David S. Miller @ 2002-09-28 3:36 UTC (permalink / raw) To: kuznet; +Cc: yoshfuji, linux-kernel, netdev, usagi From: kuznet@ms2.inr.ac.ru Date: Sat, 28 Sep 2002 07:38:09 +0400 (MSD) Now I see retransmission of practicllay the same patch, which was deferred for improvement that time. Ok, Yoshi please work Alexey to put source address selection into the right place and remove ipv6_get_saddr(). Alexey, I still am not clear, this belongs in the output routing logic right? You dance in circles talking about this patch, that patch, but what I cannot decode this into an answer to question of where source address selection belongs. ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] IPv6: Improvement of Source Address Selection 2002-09-28 3:36 ` David S. Miller @ 2002-09-28 4:19 ` kuznet 2002-09-28 4:30 ` YOSHIFUJI Hideaki / 吉藤英明 2002-09-28 4:35 ` Pekka Savola 0 siblings, 2 replies; 27+ messages in thread From: kuznet @ 2002-09-28 4:19 UTC (permalink / raw) To: David S. Miller; +Cc: yoshfuji, linux-kernel, netdev, usagi Hello! > Alexey, I still am not clear, this belongs in the output routing logic > right? ... > where source address selection belongs. Yes, it naturally belongs to the time when route is created. This is just extending ipv6 routing entry with a field to hold source address and, generally, making the same work as IPv4 does, with all the advantages, particularily capability to select preferred source address via routes set up by admin (RTA_PREFSRC attribute, "src" in "ip route add"). Alexey ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] IPv6: Improvement of Source Address Selection 2002-09-28 4:19 ` kuznet @ 2002-09-28 4:30 ` YOSHIFUJI Hideaki / 吉藤英明 2002-09-28 4:44 ` kuznet 2002-09-28 4:35 ` Pekka Savola 1 sibling, 1 reply; 27+ messages in thread From: YOSHIFUJI Hideaki / 吉藤英明 @ 2002-09-28 4:30 UTC (permalink / raw) To: kuznet; +Cc: davem, linux-kernel, netdev, usagi In article <200209280419.IAA02894@sex.inr.ac.ru> (at Sat, 28 Sep 2002 08:19:29 +0400 (MSD)), kuznet@ms2.inr.ac.ru says: > This is just extending ipv6 routing entry with a field to hold > source address and, generally, making the same work as IPv4 does, > with all the advantages, particularily capability to select preferred > source address via routes set up by admin (RTA_PREFSRC attribute, > "src" in "ip route add"). we need per socket preference. can we do that with this? -- Hideaki YOSHIFUJI @ USAGI Project <yoshfuji@linux-ipv6.org> GPG FP: 9022 65EB 1ECF 3AD1 0BDF 80D8 4807 F894 E062 0EEA ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] IPv6: Improvement of Source Address Selection 2002-09-28 4:30 ` YOSHIFUJI Hideaki / 吉藤英明 @ 2002-09-28 4:44 ` kuznet 2002-09-28 5:14 ` YOSHIFUJI Hideaki / 吉藤英明 0 siblings, 1 reply; 27+ messages in thread From: kuznet @ 2002-09-28 4:44 UTC (permalink / raw) To: YOSHIFUJI Hideaki / 吉藤英明 Cc: davem, linux-kernel, netdev, usagi Hello! > we need per socket preference. What kind of? Some matching rules loaded to socket by user? Anyway, rules established by a particular client should be separate, it is just a generalization of bind()/IP{V6}_PKTINFO. I am not sure that it is really interesting though. Just now I cannot imagine what user can invent which is not covered by system-wide rules, bind() and IP{V6}_PKTINFO. Well, if you think more hairy scheme is interesting, feel free to implement this. Alexey ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] IPv6: Improvement of Source Address Selection 2002-09-28 4:44 ` kuznet @ 2002-09-28 5:14 ` YOSHIFUJI Hideaki / 吉藤英明 2002-09-28 5:26 ` kuznet 0 siblings, 1 reply; 27+ messages in thread From: YOSHIFUJI Hideaki / 吉藤英明 @ 2002-09-28 5:14 UTC (permalink / raw) To: usagi, kuznet; +Cc: davem, linux-kernel, netdev In article <200209280444.IAA02959@sex.inr.ac.ru> (at Sat, 28 Sep 2002 08:44:29 +0400 (MSD)), kuznet@ms2.inr.ac.ru says: > I am not sure that it is really interesting though. Just now I cannot > imagine what user can invent which is not covered by system-wide rules, > bind() and IP{V6}_PKTINFO. Well, if you think more hairy scheme is interesting, > feel free to implement this. we need per application (per socket) interface for privacy extension (public address vs temporary address) and mobile ip (home address vs care-of address). -- yoshfuji ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] IPv6: Improvement of Source Address Selection 2002-09-28 5:14 ` YOSHIFUJI Hideaki / 吉藤英明 @ 2002-09-28 5:26 ` kuznet 0 siblings, 0 replies; 27+ messages in thread From: kuznet @ 2002-09-28 5:26 UTC (permalink / raw) To: YOSHIFUJI Hideaki / 吉藤英明 Cc: usagi, davem, linux-kernel, netdev Hello! > we need per application (per socket) interface > for privacy extension (public address vs temporary address) and > mobile ip (home address vs care-of address). OK. It is natural user-friendly generalization of bind(). I do not see problems. Though, please, explain, to avoid misunderstanding. Let's take the second case for simplicity. Is that true that it is supposed to add to each application a switch "home or care-of"? This sound strange enough, taking into account that only a few of applications have switch sort of -b in openssh despite of age of plain bind() is equal to age of internet. :-) Alexey ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] IPv6: Improvement of Source Address Selection 2002-09-28 4:19 ` kuznet 2002-09-28 4:30 ` YOSHIFUJI Hideaki / 吉藤英明 @ 2002-09-28 4:35 ` Pekka Savola 2002-09-28 5:00 ` kuznet 1 sibling, 1 reply; 27+ messages in thread From: Pekka Savola @ 2002-09-28 4:35 UTC (permalink / raw) To: kuznet; +Cc: David S. Miller, yoshfuji, linux-kernel, netdev, usagi On Sat, 28 Sep 2002 kuznet@ms2.inr.ac.ru wrote: > Hello! > > > Alexey, I still am not clear, this belongs in the output routing logic > > right? > ... > > where source address selection belongs. > > Yes, it naturally belongs to the time when route is created. > > This is just extending ipv6 routing entry with a field to hold > source address and, generally, making the same work as IPv4 does, > with all the advantages, particularily capability to select preferred > source address via routes set up by admin (RTA_PREFSRC attribute, > "src" in "ip route add"). Umm.. you sure? Isn't putting this logic to routes an oversimplification? Consider e.g. a dummy host which only have a few address (link-local, site-local, global; the last two /64's) and, basically, a default route (plus of course an interface routes for those /64's). When talking to other subnets within the site (ie. those not on the /64) one would have difficulties parsing the source address from the default route, as there would have to be at least two candidates there. Am I missing something obvious here? Alexey's approach should work in some simpler cases, but maybe not all (stuff that's network prefix -independent like home addresses, privacy addresses etc. would be different). -- Pekka Savola "Tell me of difficulties surmounted, Netcore Oy not those you stumble over and fall" Systems. Networks. Security. -- Robert Jordan: A Crown of Swords ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] IPv6: Improvement of Source Address Selection 2002-09-28 4:35 ` Pekka Savola @ 2002-09-28 5:00 ` kuznet 2002-09-28 5:24 ` Pekka Savola 0 siblings, 1 reply; 27+ messages in thread From: kuznet @ 2002-09-28 5:00 UTC (permalink / raw) To: Pekka Savola; +Cc: davem, yoshfuji, linux-kernel, netdev, usagi Hello! > Isn't putting this logic to routes an oversimplification? Hmmm... I believed this logic is more complicated yet. :-) > route, as there would have to be at least two candidates there. ... > Am I missing something obvious here? Yes. You select some one of the candidates eventually, do not you? :-) And when you have some special preference for a subnet you create a route for it. > (stuff that's network prefix -independent I am sorry, I feel I do not understand what you mean. Alexey ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] IPv6: Improvement of Source Address Selection 2002-09-28 5:00 ` kuznet @ 2002-09-28 5:24 ` Pekka Savola 2002-09-28 5:37 ` kuznet 0 siblings, 1 reply; 27+ messages in thread From: Pekka Savola @ 2002-09-28 5:24 UTC (permalink / raw) To: kuznet; +Cc: davem, yoshfuji, linux-kernel, netdev, usagi On Sat, 28 Sep 2002 kuznet@ms2.inr.ac.ru wrote: > > route, as there would have to be at least two candidates there. > ... > > Am I missing something obvious here? > > Yes. You select some one of the candidates eventually, do not you? :-) But can there be more candidates for one route, in which case one would run something similar to this algorithm then? Or would you have an already-sorted list of possible candidate addresses for each route in the order of preference? And recalculate always when address changes? Or..? > And when you have some special preference for a subnet you create > a route for it. This is IMO a wrong approach from user's perspective. Perhaps not if the algorithm was run and e.g. additional, temporary "address selection" routes were created by kernel. > > (stuff that's network prefix -independent > > I am sorry, I feel I do not understand what you mean. Hmm.. this depends on the interpretation of the concept above. If the list is refreshed always when addresses change or change state, this could perhaps work.. -- Pekka Savola "Tell me of difficulties surmounted, Netcore Oy not those you stumble over and fall" Systems. Networks. Security. -- Robert Jordan: A Crown of Swords ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] IPv6: Improvement of Source Address Selection 2002-09-28 5:24 ` Pekka Savola @ 2002-09-28 5:37 ` kuznet 2002-09-29 8:41 ` Pekka Savola 0 siblings, 1 reply; 27+ messages in thread From: kuznet @ 2002-09-28 5:37 UTC (permalink / raw) To: Pekka Savola; +Cc: davem, yoshfuji, linux-kernel, netdev, usagi Hello! > Or would you have an already-sorted list of possible candidate addresses > for each route in the order of preference? I am not mad yet. :-) What preference? You must select _one_ address, you do not need lost candidates. > And recalculate always when address changes? What address? Interface address? Routing tables used to be synchronized to this. > This is IMO a wrong approach from user's perspective. Perhaps not if the > algorithm was run and e.g. additional, temporary "address selection" > routes were created by kernel. > > > > (stuff that's network prefix -independent > > > > I am sorry, I feel I do not understand what you mean. > > Hmm.. this depends on the interpretation of the concept above. If the > list is refreshed always when addresses change or change state, this could > perhaps work.. I am afraid I do not understand what "address", "state", "temporary" routes etc you mean. It remained in your brains. :-) Pekka, are you not going to sleep? (I am.) I bet when you reread this tomorrow, you will not blame that my brains eventually falled to "parse error" loop. :-) Alexey ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] IPv6: Improvement of Source Address Selection 2002-09-28 5:37 ` kuznet @ 2002-09-29 8:41 ` Pekka Savola 0 siblings, 0 replies; 27+ messages in thread From: Pekka Savola @ 2002-09-29 8:41 UTC (permalink / raw) To: kuznet; +Cc: davem, yoshfuji, linux-kernel, netdev, usagi On Sat, 28 Sep 2002 kuznet@ms2.inr.ac.ru wrote: > > Or would you have an already-sorted list of possible candidate addresses > > for each route in the order of preference? > > I am not mad yet. :-) > > What preference? You must select _one_ address, you do not need lost > candidates. In the case the first entry goes away, having a list could help being able to the next one to use very easily. But this probably just an implementation detail. > > And recalculate always when address changes? > > What address? Interface address? Routing tables used to be synchronized > to this. Any address. One notable case is that the outgoing interface has only link/site-local addresses and the destination is global. There are other cases too. > > This is IMO a wrong approach from user's perspective. Perhaps not if the > > algorithm was run and e.g. additional, temporary "address selection" > > routes were created by kernel. > > > > > > (stuff that's network prefix -independent > > > > > > I am sorry, I feel I do not understand what you mean. > > > > Hmm.. this depends on the interpretation of the concept above. If the > > list is refreshed always when addresses change or change state, this could > > perhaps work.. > > I am afraid I do not understand what "address", "state", "temporary" routes > etc you mean. It remained in your brains. :-) > > Pekka, are you not going to sleep? (I am.) I bet when you reread this tomorrow, > you will not blame that my brains eventually falled to "parse error" loop. :-) I had already woken up :-). At least BSD and I think Linux create ad-hoc, "cloned" routes e.g. in Path MTU discovery process to hold some different values. I don't remember the details. I was wondering if this would be done the same or not. change state = move to deprecated, move to non-deprecated. Hope this clarifies. -- Pekka Savola "Tell me of difficulties surmounted, Netcore Oy not those you stumble over and fall" Systems. Networks. Security. -- Robert Jordan: A Crown of Swords ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] IPv6: Improvement of Source Address Selection 2002-09-27 15:17 [PATCH] IPv6: Improvement of Source Address Selection YOSHIFUJI Hideaki / 吉藤英明 2002-09-27 16:02 ` kuznet 2002-09-28 1:28 ` David S. Miller @ 2002-10-03 16:50 ` YOSHIFUJI Hideaki / 吉藤英明 2002-10-04 6:32 ` Pekka Savola 2 siblings, 1 reply; 27+ messages in thread From: YOSHIFUJI Hideaki / 吉藤英明 @ 2002-10-03 16:50 UTC (permalink / raw) To: netdev; +Cc: usagi In article <20020928.001742.125874265.yoshfuji@linux-ipv6.org> (at Sat, 28 Sep 2002 00:17:42 +0900 (JST)), YOSHIFUJI Hideaki / 吉藤英明 <yoshfuji@linux-ipv6.org> says: > This patch supports standard default source address selection > algorithm. It takes status, address/prefix itself (prefer same address, > prefer longest matching prefix) into consideration. > Note: Even though matching label is not implemented yet, > this is better than current one. > > Following patch is against linux-2.4.19. This patch is revised version. I think we have more things to be done, but anyways, - save memory (comment from devem) - introduced (static) policy label (comment from pekkas) Thanks in advance. ------ Index: include/net/addrconf.h =================================================================== RCS file: /cvsroot/usagi/usagi-backport/linux24/include/net/addrconf.h,v retrieving revision 1.1.1.1 retrieving revision 1.1.1.1.6.1 diff -u -r1.1.1.1 -r1.1.1.1.6.1 --- include/net/addrconf.h 2002/08/20 09:46:45 1.1.1.1 +++ include/net/addrconf.h 2002/09/26 19:15:15 1.1.1.1.6.1 @@ -55,6 +55,9 @@ struct net_device *dev); extern struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *dev); +extern int ipv6_dev_get_saddr(struct net_device *ddev, + struct in6_addr *daddr, + struct in6_addr *saddr); extern int ipv6_get_saddr(struct dst_entry *dst, struct in6_addr *daddr, struct in6_addr *saddr); Index: net/ipv6/addrconf.c =================================================================== RCS file: /cvsroot/usagi/usagi-backport/linux24/net/ipv6/addrconf.c,v retrieving revision 1.1.1.1 retrieving revision 1.1.1.1.6.6 diff -u -r1.1.1.1 -r1.1.1.1.6.6 --- net/ipv6/addrconf.c 2002/08/20 09:47:02 1.1.1.1 +++ net/ipv6/addrconf.c 2002/10/03 03:28:33 1.1.1.1.6.6 @@ -26,6 +26,10 @@ * packets. * yoshfuji@USAGI : Fixed interval between DAD * packets. + * YOSHIFUJI Hideaki @USAGI : improved source address + * selection; consider scope, + * status etc. + * */ #include <linux/config.h> @@ -104,6 +108,8 @@ static struct notifier_block *inet6addr_chain; +static u32 ipv6_addrselect_label_lookup(const struct in6_addr *addr, int ifindex); + struct ipv6_devconf ipv6_devconf = { 0, /* forwarding */ @@ -188,6 +194,99 @@ return IPV6_ADDR_RESERVED; } +#ifndef IPV6_ADDR_MC_SCOPE +#define IPV6_ADDR_MC_SCOPE(a) \ + ((a)->s6_addr[1] & 0x0f) /* XXX nonstandard */ +#define __IPV6_ADDR_SCOPE_RESERVED -2 +#define __IPV6_ADDR_SCOPE_ANY -1 +#define IPV6_ADDR_SCOPE_NODELOCAL 0x01 +#define IPV6_ADDR_SCOPE_LINKLOCAL 0x02 +#define IPV6_ADDR_SCOPE_SITELOCAL 0x05 +#define IPV6_ADDR_SCOPE_ORGLOCAL 0x08 +#define IPV6_ADDR_SCOPE_GLOBAL 0x0e +#endif + +int ipv6_addrselect_scope(const struct in6_addr *addr) +{ + u32 st; + + st = addr->s6_addr32[0]; + + if ((st & __constant_htonl(0xE0000000)) != __constant_htonl(0x00000000) && + (st & __constant_htonl(0xE0000000)) != __constant_htonl(0xE0000000)) + return IPV6_ADDR_SCOPE_GLOBAL; + + if ((st & __constant_htonl(0xFF000000)) == __constant_htonl(0xFF000000)) + return IPV6_ADDR_MC_SCOPE(addr); + + if ((st & __constant_htonl(0xFFC00000)) == __constant_htonl(0xFE800000)) + return IPV6_ADDR_SCOPE_LINKLOCAL; + + if ((st & __constant_htonl(0xFFC00000)) == __constant_htonl(0xFEC00000)) + return IPV6_ADDR_SCOPE_SITELOCAL; + + if ((st | addr->s6_addr32[1]) == 0) { + if (addr->s6_addr32[2] == 0) { + if (addr->s6_addr32[3] == 0) + return __IPV6_ADDR_SCOPE_ANY; + + if (addr->s6_addr32[3] == __constant_htonl(0x00000001)) + return IPV6_ADDR_SCOPE_LINKLOCAL; /* section 2.4 */ + + return IPV6_ADDR_SCOPE_GLOBAL; /* section 2.3 */ + } + + if (addr->s6_addr32[2] == __constant_htonl(0x0000FFFF)) { + if (addr->s6_addr32[3] == __constant_htonl(0xA9FF0000)) + return IPV6_ADDR_SCOPE_LINKLOCAL; /* section 2.2 */ + if (addr->s6_addr32[3] == __constant_htonl(0xAC000000)) { + if (addr->s6_addr32[3] == __constant_htonl(0xAC100000)) + return IPV6_ADDR_SCOPE_SITELOCAL; /* section 2.2 */ + + return IPV6_ADDR_SCOPE_LINKLOCAL; /* section 2.2 */ + } + if (addr->s6_addr32[3] == __constant_htonl(0x0A000000)) + return IPV6_ADDR_SCOPE_SITELOCAL; /* section 2.2 */ + if (addr->s6_addr32[3] == __constant_htonl(0xC0A80000)) + return IPV6_ADDR_SCOPE_SITELOCAL; /* section 2.2 */ + + return IPV6_ADDR_SCOPE_GLOBAL; /* section 2.2 */ + } + } + + return __IPV6_ADDR_SCOPE_RESERVED; +} + +/* find 1st bit in difference between the 2 addrs */ +static inline int addr_diff(const void *__a1, const void *__a2, int addrlen) +{ + /* find 1st bit in difference between the 2 addrs. + * bit may be an invalid value, + * but if it is >= plen, the value is ignored in any case. + */ + const u32 *a1 = __a1; + const u32 *a2 = __a2; + int i; + + addrlen >>= 2; + for (i = 0; i < addrlen; i++) { + u32 xb = a1[i] ^ a2[i]; + if (xb) { + int j = 31; + xb = ntohl(xb); + while ((xb & (1 << j)) == 0) + j--; + return (i * 32 + 31 - j); + } + } + return addrlen<<5; +} + +static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_addr *a2) +{ + return addr_diff(a1->s6_addr, a2->s6_addr, sizeof(struct in6_addr)); +} + static void addrconf_del_timer(struct inet6_ifaddr *ifp) { if (del_timer(&ifp->timer)) @@ -449,122 +548,160 @@ /* * Choose an apropriate source address - * should do: - * i) get an address with an apropriate scope - * ii) see if there is a specific route for the destination and use - * an address of the attached interface - * iii) don't use deprecated addresses + * draft-ietf-ipngwg-default-addr-select-09.txt */ -int ipv6_get_saddr(struct dst_entry *dst, - struct in6_addr *daddr, struct in6_addr *saddr) +#define IPV6_SADDRSELECT_SELF 0x01 +#define IPV6_SADDRSELECT_PREFERRED 0x02 +#define IPV6_SADDRSELECT_HOME 0x04 +#define IPV6_SADDRSELECT_PUBLIC 0x08 +#define IPV6_SADDRSELECT_INTERFACE 0x10 +#define IPV6_SADDRSELECT_LABEL 0x20 + +struct addrselect_attrs { + struct inet6_ifaddr *ifp; + u16 flags; + s16 matchlen; + u8 scope; +}; + +int ipv6_dev_get_saddr(struct net_device *daddr_dev, + struct in6_addr *daddr, struct in6_addr *saddr) { - int scope; - struct inet6_ifaddr *ifp = NULL; - struct inet6_ifaddr *match = NULL; - struct net_device *dev = NULL; + int daddr_scope; + u32 daddr_label; + struct inet6_ifaddr *ifp0, *ifp = NULL; + struct net_device *dev; struct inet6_dev *idev; - struct rt6_info *rt; - int err; - rt = (struct rt6_info *) dst; - if (rt) - dev = rt->rt6i_dev; - - scope = ipv6_addr_scope(daddr); - if (rt && (rt->rt6i_flags & RTF_ALLONLINK)) { - /* - * route for the "all destinations on link" rule - * when no routers are present - */ - scope = IFA_LINK; - } - - /* - * known dev - * search dev and walk through dev addresses - */ + int err; + int update; + struct addrselect_attrs candidate = {NULL,0,0}; - if (dev) { - if (dev->flags & IFF_LOOPBACK) - scope = IFA_HOST; + daddr_scope = ipv6_addrselect_scope(daddr); + daddr_label = ipv6_addrselect_label_lookup(daddr, + daddr_dev?daddr_dev->ifindex:0); - read_lock(&addrconf_lock); + read_lock(&dev_base_lock); + read_lock(&addrconf_lock); + for (dev = dev_base; dev; dev=dev->next) { idev = __in6_dev_get(dev); - if (idev) { - read_lock_bh(&idev->lock); - for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { - if (ifp->scope == scope) { - if (!(ifp->flags & (IFA_F_DEPRECATED|IFA_F_TENTATIVE))) { - in6_ifa_hold(ifp); - read_unlock_bh(&idev->lock); - read_unlock(&addrconf_lock); - goto out; - } - - if (!match && !(ifp->flags & IFA_F_TENTATIVE)) { - match = ifp; - in6_ifa_hold(ifp); - } + + if (!idev) + continue; + + read_lock_bh(&idev->lock); + ifp0 = idev->addr_list; + for (ifp=ifp0; ifp; ifp=ifp->if_next) { + struct addrselect_attrs temp = {NULL,0,0}; + update = 0; + + /* Rule 1: Prefer same address */ + if (ipv6_addr_cmp(&ifp->addr, daddr) == 0) + temp.flags |= IPV6_SADDRSELECT_SELF; + else + temp.flags &= ~IPV6_SADDRSELECT_SELF; + update = (temp.flags&IPV6_SADDRSELECT_SELF) - + (candidate.flags&IPV6_SADDRSELECT_SELF); + if (update < 0) { + continue; + } + + /* Rule 2: Prefer appropriate scope */ + temp.scope = ipv6_addrselect_scope(&ifp->addr); + if (!update) { + update = temp.scope - candidate.scope; + if (update > 0) { + update = candidate.scope < daddr_scope ? 1 : -1; + } else if (update < 0) { + update = temp.scope < daddr_scope ? -1 : 1; } } - read_unlock_bh(&idev->lock); - } - read_unlock(&addrconf_lock); - } + if (update < 0) { + continue; + } - if (scope == IFA_LINK) - goto out; + /* Rule 3: Avoid deprecated address */ + if (!(ifp->flags & IFA_F_DEPRECATED)) + temp.flags |= IPV6_SADDRSELECT_PREFERRED; + else + temp.flags &= ~IPV6_SADDRSELECT_PREFERRED; + if (!update) + update = (temp.flags&IPV6_SADDRSELECT_PREFERRED) - + (candidate.flags&IPV6_SADDRSELECT_PREFERRED); + if (update < 0) { + continue; + } - /* - * dev == NULL or search failed for specified dev - */ + /* XXX: Rule 4: Prefer home address */ - read_lock(&dev_base_lock); - read_lock(&addrconf_lock); - for (dev = dev_base; dev; dev=dev->next) { - idev = __in6_dev_get(dev); - if (idev) { - read_lock_bh(&idev->lock); - for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { - if (ifp->scope == scope) { - if (!(ifp->flags&(IFA_F_DEPRECATED|IFA_F_TENTATIVE))) { - in6_ifa_hold(ifp); - read_unlock_bh(&idev->lock); - goto out_unlock_base; - } - - if (!match && !(ifp->flags&IFA_F_TENTATIVE)) { - match = ifp; - in6_ifa_hold(ifp); - } - } + /* Rule 5: Prefer outgoing interface */ + if (daddr_dev == NULL || ifp->idev == NULL || + daddr_dev == ifp->idev->dev) + temp.flags |= IPV6_SADDRSELECT_INTERFACE; + else + temp.flags &= ~IPV6_SADDRSELECT_INTERFACE; + if (!update) + update = (temp.flags&IPV6_SADDRSELECT_INTERFACE) - + (candidate.flags&IPV6_SADDRSELECT_INTERFACE); + if (update < 0) { + continue; } - read_unlock_bh(&idev->lock); + + /* XXX: Rule 6: Prefer matching label */ + if (ipv6_addrselect_label_lookup(&ifp->addr, dev->ifindex) == daddr_label) + temp.flags |= IPV6_SADDRSELECT_LABEL; + else + temp.flags &= ~IPV6_SADDRSELECT_LABEL; + if (!update) + update = (temp.flags&IPV6_SADDRSELECT_LABEL) - + (candidate.flags&IPV6_SADDRSELECT_LABEL); + if (update < 0) { + continue; + } + + /* XXX: Rule 7: Prefer public address */ + + /* Rule 8: Use longest matching prefix */ + temp.matchlen = ipv6_addr_diff(&ifp->addr, daddr); + if (!update) + update = temp.matchlen - candidate.matchlen; + if (update < 0) { + continue; + } + + /* Final Rule */ + if (update <= 0) + continue; + + /* update candidate */ + temp.ifp = ifp; + in6_ifa_hold(ifp); + if (candidate.ifp) + in6_ifa_put(candidate.ifp); + candidate = temp; } + read_unlock_bh(&idev->lock); } - -out_unlock_base: read_unlock(&addrconf_lock); read_unlock(&dev_base_lock); - -out: - if (ifp == NULL) { - ifp = match; - match = NULL; - } - err = -EADDRNOTAVAIL; - if (ifp) { - ipv6_addr_copy(saddr, &ifp->addr); + if (candidate.ifp) { + ipv6_addr_copy(saddr, &candidate.ifp->addr); + in6_ifa_put(candidate.ifp); err = 0; - in6_ifa_put(ifp); + } else { + err = -EADDRNOTAVAIL; } - if (match) - in6_ifa_put(match); - return err; } +int ipv6_get_saddr(struct dst_entry *dst, + struct in6_addr *daddr, struct in6_addr *saddr) +{ + return ipv6_dev_get_saddr(dst ? ((struct rt6_info *)dst)->rt6i_dev : NULL, + daddr, saddr); +} + int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr) { struct inet6_dev *idev; @@ -636,6 +773,69 @@ read_unlock_bh(&addrconf_hash_lock); return ifp; +} + +/* address selection: default policy label */ +/* XXX: user level configuration */ +static struct ipv6_addrselect_label { + struct in6_addr addr; + u16 plen; + u32 ifindex; + u32 label; +} ipv6_addrselect_label_table[] = { + /* ::1/128, label = 0 */ + { + .addr = {{{ [15] = 1 }}}, + .plen = 128, + .label = 0, + }, + /* ::/0, label = 1 */ + { + .plen = 0, + .label = 1, + }, + /* 2002::/16, label = 2 */ + { + .addr = {{{ 0x20, 0x02 }}}, + .plen = 16, + .label = 2, + }, + /* ::/96, label = 3 */ + { + .plen = 96, + .label = 3, + }, + /* ::ffff:0:0/96, label = 4 */ + { + .addr = {{{ [10] = 0xff, [11] = 0xff }}}, + .plen = 96, + .label = 4, + }, + /* sentinel */ + { + .label = 0xffffffff, + } +}; + +static u32 ipv6_addrselect_label_lookup(const struct in6_addr *addr, + int ifindex) +{ + struct ipv6_addrselect_label *p; + int plen, matchlen = -1; + u32 label = 0xffffffff; + + for (p = ipv6_addrselect_label_table; + p->label != 0xffffffff; + p++) { + if (ifindex && p->ifindex && ifindex != p->ifindex) + continue; + plen = ipv6_addr_diff(addr, &p->addr); + if (plen < p->plen || plen < matchlen) + continue; + matchlen = plen; + label = p->label; + } + return label; } /* Gets referenced address, destroys ifaddr */ ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] IPv6: Improvement of Source Address Selection 2002-10-03 16:50 ` YOSHIFUJI Hideaki / 吉藤英明 @ 2002-10-04 6:32 ` Pekka Savola 2002-10-05 18:33 ` YOSHIFUJI Hideaki / 吉藤英明 0 siblings, 1 reply; 27+ messages in thread From: Pekka Savola @ 2002-10-04 6:32 UTC (permalink / raw) To: YOSHIFUJI Hideaki / 吉藤英明; +Cc: netdev, usagi There seems to be __constant_htonl there too but this is just a nit, and shouldn't a showstopper in the review. A few comments, mainly on the spec perspective below. Are IPv4 addresses represented as mapped addresses (as they should by the spec at least)? There seem to be some points at section 4 of the draft (e.g. for multicast destinations, MUST only pick addresses on the outgoing interface) that may be missing? > +#ifndef IPV6_ADDR_MC_SCOPE > +#define IPV6_ADDR_MC_SCOPE(a) \ > + ((a)->s6_addr[1] & 0x0f) /* XXX nonstandard */ > +#define __IPV6_ADDR_SCOPE_RESERVED -2 > +#define __IPV6_ADDR_SCOPE_ANY -1 > +#define IPV6_ADDR_SCOPE_NODELOCAL 0x01 > +#define IPV6_ADDR_SCOPE_LINKLOCAL 0x02 > +#define IPV6_ADDR_SCOPE_SITELOCAL 0x05 > +#define IPV6_ADDR_SCOPE_ORGLOCAL 0x08 > +#define IPV6_ADDR_SCOPE_GLOBAL 0x0e > +#endif Aren't these definitions header file material, perhaps (I'd guess they might be useful in other .c files too). > +int ipv6_addrselect_scope(const struct in6_addr *addr) > +{ > + u32 st; > + > + st = addr->s6_addr32[0]; > + > + if ((st & __constant_htonl(0xE0000000)) != __constant_htonl(0x00000000) && > + (st & __constant_htonl(0xE0000000)) != __constant_htonl(0xE0000000)) > + return IPV6_ADDR_SCOPE_GLOBAL; > + > + if ((st & __constant_htonl(0xFF000000)) == __constant_htonl(0xFF000000)) > + return IPV6_ADDR_MC_SCOPE(addr); > + > + if ((st & __constant_htonl(0xFFC00000)) == __constant_htonl(0xFE800000)) > + return IPV6_ADDR_SCOPE_LINKLOCAL; > + > + if ((st & __constant_htonl(0xFFC00000)) == __constant_htonl(0xFEC00000)) > + return IPV6_ADDR_SCOPE_SITELOCAL; Something similar to this is done in addrconf.c:ipv6_addr_type, could there be more reuse? > + if ((st | addr->s6_addr32[1]) == 0) { > + if (addr->s6_addr32[2] == 0) { > + if (addr->s6_addr32[3] == 0) > + return __IPV6_ADDR_SCOPE_ANY; > + > + if (addr->s6_addr32[3] == __constant_htonl(0x00000001)) > + return IPV6_ADDR_SCOPE_LINKLOCAL; /* section 2.4 */ > + > + return IPV6_ADDR_SCOPE_GLOBAL; /* section 2.3 */ > + } You're referring to sections 3.4 and 3.3, I think (similar in other comments) > + if (addr->s6_addr32[2] == __constant_htonl(0x0000FFFF)) { > + if (addr->s6_addr32[3] == __constant_htonl(0xA9FF0000)) > + return IPV6_ADDR_SCOPE_LINKLOCAL; /* section 2.2 */ Shouldn't that be 0xA9FE0000 if you mean IPv4 zeroconf 169.254.0.0/16 ? (that could be spelt out in a comment.) > + if (addr->s6_addr32[3] == __constant_htonl(0xAC000000)) { > + if (addr->s6_addr32[3] == __constant_htonl(0xAC100000)) > + return IPV6_ADDR_SCOPE_SITELOCAL; /* section 2.2 */ 172.16.00 -- 172.31.255.255, not just 172.16.*.* > + return IPV6_ADDR_SCOPE_LINKLOCAL; /* section 2.2 */ > + } I don't understand this, this was possibly supposed to be the case for 127.0.0.0/8 which should be treated as link-local? -- Pekka Savola "Tell me of difficulties surmounted, Netcore Oy not those you stumble over and fall" Systems. Networks. Security. -- Robert Jordan: A Crown of Swords ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] IPv6: Improvement of Source Address Selection 2002-10-04 6:32 ` Pekka Savola @ 2002-10-05 18:33 ` YOSHIFUJI Hideaki / 吉藤英明 2002-10-10 14:29 ` Pekka Savola 0 siblings, 1 reply; 27+ messages in thread From: YOSHIFUJI Hideaki / 吉藤英明 @ 2002-10-05 18:33 UTC (permalink / raw) To: pekkas; +Cc: netdev, usagi Thank you for your comments. In article <Pine.LNX.4.44.0210040902010.16205-100000@netcore.fi> (at Fri, 4 Oct 2002 09:32:33 +0300 (EEST)), Pekka Savola <pekkas@netcore.fi> says: > Are IPv4 addresses represented as mapped addresses (as they should by the > spec at least)? see below. > There seem to be some points at section 4 of the draft (e.g. for multicast > destinations, MUST only pick addresses on the outgoing interface) that may > be missing? fixed. > > +#ifndef IPV6_ADDR_MC_SCOPE > > +#define IPV6_ADDR_MC_SCOPE(a) \ > > + ((a)->s6_addr[1] & 0x0f) /* XXX nonstandard */ : > Aren't these definitions header file material, perhaps (I'd guess they > might be useful in other .c files too). I thought that we would do it later, but anyway, moved to include/net/ipv6.h. > > +int ipv6_addrselect_scope(const struct in6_addr *addr) : > Something similar to this is done in addrconf.c:ipv6_addr_type, could > there be more reuse? integrated core of the code to ipv6_addr_type(). > > + if (addr->s6_addr32[3] == __constant_htonl(0x00000001)) > > + return IPV6_ADDR_SCOPE_LINKLOCAL; /* section 2.4 */ > > + > > + return IPV6_ADDR_SCOPE_GLOBAL; /* section 2.3 */ > > + } > > You're referring to sections 3.4 and 3.3, I think (similar in other > comments) fixed. > > + if (addr->s6_addr32[2] == __constant_htonl(0x0000FFFF)) { > > + if (addr->s6_addr32[3] == __constant_htonl(0xA9FF0000)) > > + return IPV6_ADDR_SCOPE_LINKLOCAL; /* section 2.2 */ > > Shouldn't that be 0xA9FE0000 if you mean IPv4 zeroconf 169.254.0.0/16 ? > (that could be spelt out in a comment.) > > > + if (addr->s6_addr32[3] == __constant_htonl(0xAC000000)) { > > + if (addr->s6_addr32[3] == __constant_htonl(0xAC100000)) > > + return IPV6_ADDR_SCOPE_SITELOCAL; /* section 2.2 */ > > 172.16.00 -- 172.31.255.255, not just 172.16.*.* > > > + return IPV6_ADDR_SCOPE_LINKLOCAL; /* section 2.2 */ > > + } > > I don't understand this, this was possibly supposed to be the case for > 127.0.0.0/8 which should be treated as link-local? How stupid code I wrote... And,.. I reread the spec and found that ipv4-mapped addresses are global scope for source address selection. So..., I removed above codes. Well, Following patch is against linux-2.4.19. BTW, "IPv6: Miscellaneous clean-ups" (FIX_2_4_19_MISC_CLEANUPS-20020912) and this patch conflics. What kind of patch do you prefer? 1. patch on top of plain kernel 2. patch on top of other-patched kernel 3. patch with other patch (which conflicts) on top of plain kernel Thank you in advance. Index: include/net/addrconf.h =================================================================== RCS file: /cvsroot/usagi/usagi-backport/linux24/include/net/addrconf.h,v retrieving revision 1.1.1.1 retrieving revision 1.1.1.1.6.1 diff -u -r1.1.1.1 -r1.1.1.1.6.1 --- include/net/addrconf.h 2002/08/20 09:46:45 1.1.1.1 +++ include/net/addrconf.h 2002/09/26 19:15:15 1.1.1.1.6.1 @@ -55,6 +55,9 @@ struct net_device *dev); extern struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *dev); +extern int ipv6_dev_get_saddr(struct net_device *ddev, + struct in6_addr *daddr, + struct in6_addr *saddr); extern int ipv6_get_saddr(struct dst_entry *dst, struct in6_addr *daddr, struct in6_addr *saddr); Index: include/net/ipv6.h =================================================================== RCS file: /cvsroot/usagi/usagi-backport/linux24/include/net/ipv6.h,v retrieving revision 1.1.1.1 diff -u -r1.1.1.1 ipv6.h --- include/net/ipv6.h 2002/08/20 09:46:45 1.1.1.1 +++ include/net/ipv6.h 2002/10/05 17:43:48 @@ -74,6 +74,20 @@ #define IPV6_ADDR_RESERVED 0x2000U /* reserved address space */ /* + * Addr scopes + */ +#ifdef __KERNEL__ +#define IPV6_ADDR_MC_SCOPE(a) \ + ((a)->s6_addr[1] & 0x0f) /* XXX nonstandard */ +#define __IPV6_ADDR_SCOPE_INVALID -1 +#endif +#define IPV6_ADDR_SCOPE_NODELOCAL 0x01 +#define IPV6_ADDR_SCOPE_LINKLOCAL 0x02 +#define IPV6_ADDR_SCOPE_SITELOCAL 0x05 +#define IPV6_ADDR_SCOPE_ORGLOCAL 0x08 +#define IPV6_ADDR_SCOPE_GLOBAL 0x0e + +/* * fragmentation header */ @@ -203,12 +217,28 @@ char *, unsigned int, unsigned int); - -extern int ipv6_addr_type(struct in6_addr *addr); +/* + * Address manipulation functions + */ +extern int __ipv6_addr_type(struct in6_addr *addr); +static inline int ipv6_addr_type(struct in6_addr *addr) +{ + return __ipv6_addr_type(addr) & 0xffff; +} static inline int ipv6_addr_scope(struct in6_addr *addr) +{ + return __ipv6_addr_type(addr) & IPV6_ADDR_SCOPE_MASK; +} + +static inline int __ipv6_addr_src_scope(int type) +{ + return type == IPV6_ADDR_ANY ? __IPV6_ADDR_SCOPE_INVALID : type>>16; +} + +static inline int ipv6_addr_src_scope(struct in6_addr *addr) { - return ipv6_addr_type(addr) & IPV6_ADDR_SCOPE_MASK; + return __ipv6_addr_src_scope(__ipv6_addr_type(addr)); } static inline int ipv6_addr_cmp(struct in6_addr *a1, struct in6_addr *a2) Index: net/ipv6/addrconf.c =================================================================== RCS file: /cvsroot/usagi/usagi-backport/linux24/net/ipv6/addrconf.c,v retrieving revision 1.1.1.1 retrieving revision 1.1.1.1.6.16 diff -u -r1.1.1.1 -r1.1.1.1.6.16 --- net/ipv6/addrconf.c 2002/08/20 09:47:02 1.1.1.1 +++ net/ipv6/addrconf.c 2002/10/05 17:26:27 1.1.1.1.6.16 @@ -26,6 +26,10 @@ * packets. * yoshfuji@USAGI : Fixed interval between DAD * packets. + * YOSHIFUJI Hideaki @USAGI : improved source address + * selection; consider scope, + * status etc. + * */ #include <linux/config.h> @@ -104,6 +108,8 @@ static struct notifier_block *inet6addr_chain; +static u32 ipv6_addrselect_label_lookup(const struct in6_addr *addr, int ifindex); + struct ipv6_devconf ipv6_devconf = { 0, /* forwarding */ @@ -132,7 +138,7 @@ MAX_RTR_SOLICITATION_DELAY, /* rtr solicit delay */ }; -int ipv6_addr_type(struct in6_addr *addr) +int __ipv6_addr_type(struct in6_addr *addr) { u32 st; @@ -143,32 +149,38 @@ */ if ((st & __constant_htonl(0xE0000000)) != __constant_htonl(0x00000000) && (st & __constant_htonl(0xE0000000)) != __constant_htonl(0xE0000000)) - return IPV6_ADDR_UNICAST; + return (IPV6_ADDR_UNICAST | + IPV6_ADDR_SCOPE_GLOBAL<<16); if ((st & __constant_htonl(0xFF000000)) == __constant_htonl(0xFF000000)) { - int type = IPV6_ADDR_MULTICAST; + /* multicast */ + /* addr-select 3.1 */ + int type = IPV6_ADDR_MC_SCOPE(addr)<<16; - switch((st & __constant_htonl(0x00FF0000))) { - case __constant_htonl(0x00010000): + switch(type) { + case IPV6_ADDR_SCOPE_NODELOCAL<<16: type |= IPV6_ADDR_LOOPBACK; break; - case __constant_htonl(0x00020000): + case IPV6_ADDR_SCOPE_LINKLOCAL<<16: type |= IPV6_ADDR_LINKLOCAL; break; - case __constant_htonl(0x00050000): + case IPV6_ADDR_SCOPE_SITELOCAL<<16: type |= IPV6_ADDR_SITELOCAL; break; }; + type |= IPV6_ADDR_MULTICAST; return type; } if ((st & __constant_htonl(0xFFC00000)) == __constant_htonl(0xFE800000)) - return (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST); + return (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST | + IPV6_ADDR_SCOPE_LINKLOCAL<<16); /* addr-select 3.1 */ if ((st & __constant_htonl(0xFFC00000)) == __constant_htonl(0xFEC00000)) - return (IPV6_ADDR_SITELOCAL | IPV6_ADDR_UNICAST); + return (IPV6_ADDR_SITELOCAL | IPV6_ADDR_UNICAST | + IPV6_ADDR_SCOPE_SITELOCAL<<16); /* addr-select 3.1 */ if ((addr->s6_addr32[0] | addr->s6_addr32[1]) == 0) { if (addr->s6_addr32[2] == 0) { @@ -176,18 +188,52 @@ return IPV6_ADDR_ANY; if (addr->s6_addr32[3] == __constant_htonl(0x00000001)) - return (IPV6_ADDR_LOOPBACK | IPV6_ADDR_UNICAST); + return (IPV6_ADDR_LOOPBACK | IPV6_ADDR_UNICAST | + IPV6_ADDR_SCOPE_LINKLOCAL<<16); /* addr-select 3.4 */ - return (IPV6_ADDR_COMPATv4 | IPV6_ADDR_UNICAST); + return (IPV6_ADDR_COMPATv4 | IPV6_ADDR_UNICAST | + IPV6_ADDR_SCOPE_GLOBAL<<16); /* addr-select 3.3 */ } if (addr->s6_addr32[2] == __constant_htonl(0x0000ffff)) - return IPV6_ADDR_MAPPED; + return (IPV6_ADDR_MAPPED | + IPV6_ADDR_SCOPE_GLOBAL<<16); /* addr-select 3.3 */ } - return IPV6_ADDR_RESERVED; + return (IPV6_ADDR_RESERVED | + IPV6_ADDR_SCOPE_GLOBAL<<16); /* addr-select 3.4 */ } +/* find 1st bit in difference between the 2 addrs */ +static inline int addr_diff(const void *__a1, const void *__a2, int addrlen) +{ + /* find 1st bit in difference between the 2 addrs. + * bit may be an invalid value, + * but if it is >= plen, the value is ignored in any case. + */ + const u32 *a1 = __a1; + const u32 *a2 = __a2; + int i; + + addrlen >>= 2; + for (i = 0; i < addrlen; i++) { + u32 xb = a1[i] ^ a2[i]; + if (xb) { + int j = 31; + xb = ntohl(xb); + while ((xb & (1 << j)) == 0) + j--; + return (i * 32 + 31 - j); + } + } + return addrlen<<5; +} + +static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_addr *a2) +{ + return addr_diff(a1->s6_addr, a2->s6_addr, sizeof(struct in6_addr)); +} + static void addrconf_del_timer(struct inet6_ifaddr *ifp) { if (del_timer(&ifp->timer)) @@ -449,122 +495,189 @@ /* * Choose an apropriate source address - * should do: - * i) get an address with an apropriate scope - * ii) see if there is a specific route for the destination and use - * an address of the attached interface - * iii) don't use deprecated addresses + * draft-ietf-ipv6-default-addr-select-09.txt */ -int ipv6_get_saddr(struct dst_entry *dst, - struct in6_addr *daddr, struct in6_addr *saddr) +#define IPV6_SADDRSELECT_SELF 0x01 +#define IPV6_SADDRSELECT_PREFERRED 0x02 +#define IPV6_SADDRSELECT_HOME 0x04 +#define IPV6_SADDRSELECT_PUBLIC 0x08 +#define IPV6_SADDRSELECT_INTERFACE 0x10 +#define IPV6_SADDRSELECT_LABEL 0x20 + +struct addrselect_attrs { + struct inet6_ifaddr *ifp; + u16 flags; + s16 matchlen; + u8 scope; +}; + +int ipv6_dev_get_saddr(struct net_device *daddr_dev, + struct in6_addr *daddr, struct in6_addr *saddr) { - int scope; - struct inet6_ifaddr *ifp = NULL; - struct inet6_ifaddr *match = NULL; - struct net_device *dev = NULL; + int daddr_type, daddr_scope; + u32 daddr_label; + struct inet6_ifaddr *ifp0, *ifp = NULL; + struct net_device *dev; struct inet6_dev *idev; - struct rt6_info *rt; + int err; + int update; + struct addrselect_attrs candidate = {NULL,0,0}; - rt = (struct rt6_info *) dst; - if (rt) - dev = rt->rt6i_dev; - - scope = ipv6_addr_scope(daddr); - if (rt && (rt->rt6i_flags & RTF_ALLONLINK)) { - /* - * route for the "all destinations on link" rule - * when no routers are present - */ - scope = IFA_LINK; - } + daddr_type = __ipv6_addr_type(daddr); + daddr_scope = __ipv6_addr_src_scope(daddr_type); + daddr_label = ipv6_addrselect_label_lookup(daddr, + daddr_dev?daddr_dev->ifindex:0); - /* - * known dev - * search dev and walk through dev addresses - */ + read_lock(&dev_base_lock); + read_lock(&addrconf_lock); + for (dev = dev_base; dev; dev=dev->next) { + idev = __in6_dev_get(dev); - if (dev) { - if (dev->flags & IFF_LOOPBACK) - scope = IFA_HOST; + if (!idev) + continue; - read_lock(&addrconf_lock); - idev = __in6_dev_get(dev); - if (idev) { - read_lock_bh(&idev->lock); - for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { - if (ifp->scope == scope) { - if (!(ifp->flags & (IFA_F_DEPRECATED|IFA_F_TENTATIVE))) { - in6_ifa_hold(ifp); - read_unlock_bh(&idev->lock); - read_unlock(&addrconf_lock); - goto out; - } - - if (!match && !(ifp->flags & IFA_F_TENTATIVE)) { - match = ifp; - in6_ifa_hold(ifp); - } + /* Rule 0: Candidate Source Address (section 4) + * - multicast and link-local destination address, + * the set of candidate source address MUST only + * include addresses assigned to interfaces + * belonging to the same link as the outgoing + * interface. + * (- For site-local destination addresses, the + * set of candidate source addresses MUST only + * include addresses assigned to interfaces + * belonging to the same site as the outgoing + * interface.) + */ + if ((daddr_type&IPV6_ADDR_MULTICAST || + daddr_scope <= IPV6_ADDR_SCOPE_LINKLOCAL) && + daddr_dev && dev != daddr_dev) + continue; + + read_lock_bh(&idev->lock); + ifp0 = idev->addr_list; + for (ifp=ifp0; ifp; ifp=ifp->if_next) { + struct addrselect_attrs temp = {NULL,0,0}; + int addr_type; + update = 0; + + /* Rule 0: Candidate Source Address (section 4) + * - In any case, anycast addresses, multicast + * addresses, and the unspecified address MUST + * NOT be included in a candidate set. + */ + addr_type = __ipv6_addr_type(&ifp->addr); + if (addr_type == IPV6_ADDR_ANY || + addr_type&IPV6_ADDR_MULTICAST) + continue; + + /* Rule 1: Prefer same address */ + if (ipv6_addr_cmp(&ifp->addr, daddr) == 0) + temp.flags |= IPV6_SADDRSELECT_SELF; + else + temp.flags &= ~IPV6_SADDRSELECT_SELF; + update = (temp.flags&IPV6_SADDRSELECT_SELF) - + (candidate.flags&IPV6_SADDRSELECT_SELF); + if (update < 0) { + continue; + } + + /* Rule 2: Prefer appropriate scope */ + temp.scope = __ipv6_addr_src_scope(addr_type); + if (!update) { + update = temp.scope - candidate.scope; + if (update > 0) { + update = candidate.scope < daddr_scope ? 1 : -1; + } else if (update < 0) { + update = temp.scope < daddr_scope ? -1 : 1; } } - read_unlock_bh(&idev->lock); - } - read_unlock(&addrconf_lock); - } + if (update < 0) { + continue; + } - if (scope == IFA_LINK) - goto out; + /* Rule 3: Avoid deprecated address */ + if (!(ifp->flags & IFA_F_DEPRECATED)) + temp.flags |= IPV6_SADDRSELECT_PREFERRED; + else + temp.flags &= ~IPV6_SADDRSELECT_PREFERRED; + if (!update) + update = (temp.flags&IPV6_SADDRSELECT_PREFERRED) - + (candidate.flags&IPV6_SADDRSELECT_PREFERRED); + if (update < 0) { + continue; + } - /* - * dev == NULL or search failed for specified dev - */ + /* XXX: Rule 4: Prefer home address */ - read_lock(&dev_base_lock); - read_lock(&addrconf_lock); - for (dev = dev_base; dev; dev=dev->next) { - idev = __in6_dev_get(dev); - if (idev) { - read_lock_bh(&idev->lock); - for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { - if (ifp->scope == scope) { - if (!(ifp->flags&(IFA_F_DEPRECATED|IFA_F_TENTATIVE))) { - in6_ifa_hold(ifp); - read_unlock_bh(&idev->lock); - goto out_unlock_base; - } - - if (!match && !(ifp->flags&IFA_F_TENTATIVE)) { - match = ifp; - in6_ifa_hold(ifp); - } - } + /* Rule 5: Prefer outgoing interface */ + if (daddr_dev == NULL || ifp->idev == NULL || + daddr_dev == ifp->idev->dev) + temp.flags |= IPV6_SADDRSELECT_INTERFACE; + else + temp.flags &= ~IPV6_SADDRSELECT_INTERFACE; + if (!update) + update = (temp.flags&IPV6_SADDRSELECT_INTERFACE) - + (candidate.flags&IPV6_SADDRSELECT_INTERFACE); + if (update < 0) { + continue; + } + + /* XXX: Rule 6: Prefer matching label */ + if (ipv6_addrselect_label_lookup(&ifp->addr, dev->ifindex) == daddr_label) + temp.flags |= IPV6_SADDRSELECT_LABEL; + else + temp.flags &= ~IPV6_SADDRSELECT_LABEL; + if (!update) + update = (temp.flags&IPV6_SADDRSELECT_LABEL) - + (candidate.flags&IPV6_SADDRSELECT_LABEL); + if (update < 0) { + continue; + } + + /* XXX: Rule 7: Prefer public address */ + + /* Rule 8: Use longest matching prefix */ + temp.matchlen = ipv6_addr_diff(&ifp->addr, daddr); + if (!update) + update = temp.matchlen - candidate.matchlen; + if (update < 0) { + continue; } - read_unlock_bh(&idev->lock); + + /* Final Rule */ + if (update <= 0) + continue; + + /* update candidate */ + temp.ifp = ifp; + in6_ifa_hold(ifp); + if (candidate.ifp) + in6_ifa_put(candidate.ifp); + candidate = temp; } + read_unlock_bh(&idev->lock); } - -out_unlock_base: read_unlock(&addrconf_lock); read_unlock(&dev_base_lock); - -out: - if (ifp == NULL) { - ifp = match; - match = NULL; - } - err = -EADDRNOTAVAIL; - if (ifp) { - ipv6_addr_copy(saddr, &ifp->addr); + if (candidate.ifp) { + ipv6_addr_copy(saddr, &candidate.ifp->addr); + in6_ifa_put(candidate.ifp); err = 0; - in6_ifa_put(ifp); + } else { + err = -EADDRNOTAVAIL; } - if (match) - in6_ifa_put(match); - return err; } +int ipv6_get_saddr(struct dst_entry *dst, + struct in6_addr *daddr, struct in6_addr *saddr) +{ + return ipv6_dev_get_saddr(dst ? ((struct rt6_info *)dst)->rt6i_dev : NULL, + daddr, saddr); +} + int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr) { struct inet6_dev *idev; @@ -636,6 +749,69 @@ read_unlock_bh(&addrconf_hash_lock); return ifp; +} + +/* address selection: default policy label */ +/* XXX: user level configuration */ +static struct ipv6_addrselect_label { + struct in6_addr addr; + u16 plen; + u32 ifindex; + u32 label; +} ipv6_addrselect_label_table[] = { + /* ::1/128, label = 0 */ + { + .addr = {{{ [15] = 1 }}}, + .plen = 128, + .label = 0, + }, + /* ::/0, label = 1 */ + { + .plen = 0, + .label = 1, + }, + /* 2002::/16, label = 2 */ + { + .addr = {{{ 0x20, 0x02 }}}, + .plen = 16, + .label = 2, + }, + /* ::/96, label = 3 */ + { + .plen = 96, + .label = 3, + }, + /* ::ffff:0:0/96, label = 4 */ + { + .addr = {{{ [10] = 0xff, [11] = 0xff }}}, + .plen = 96, + .label = 4, + }, + /* sentinel */ + { + .label = 0xffffffff, + } +}; + +static u32 ipv6_addrselect_label_lookup(const struct in6_addr *addr, + int ifindex) +{ + struct ipv6_addrselect_label *p; + int plen, matchlen = -1; + u32 label = 0xffffffff; + + for (p = ipv6_addrselect_label_table; + p->label != 0xffffffff; + p++) { + if (ifindex && p->ifindex && ifindex != p->ifindex) + continue; + plen = ipv6_addr_diff(addr, &p->addr); + if (plen < p->plen || plen < matchlen) + continue; + matchlen = plen; + label = p->label; + } + return label; } /* Gets referenced address, destroys ifaddr */ ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] IPv6: Improvement of Source Address Selection 2002-10-05 18:33 ` YOSHIFUJI Hideaki / 吉藤英明 @ 2002-10-10 14:29 ` Pekka Savola 2002-10-10 15:23 ` YOSHIFUJI Hideaki / 吉藤英明 0 siblings, 1 reply; 27+ messages in thread From: Pekka Savola @ 2002-10-10 14:29 UTC (permalink / raw) To: YOSHIFUJI Hideaki / 吉藤英明; +Cc: netdev, usagi On Sun, 6 Oct 2002, YOSHIFUJI Hideaki / [iso-2022-jp] ^[$B5HF#1QL@^[(B wrote: Dave, Alexey.. I think there has to be a high-level decision on how to proceed here. I'm referring to the optimization. TCP or other connection-oriented protocols use this one per connection; UDP and the like probably once per packet. The latter is at least quite undesirable, as you pointed out. The question is how one can proceed here, what kind of caching or a the type of approach taken. Putting the stuff in the routing table could work, but then this algorithm would have to be re-run always when there are changes in any address in the node. There might be other ways. -- Pekka Savola "Tell me of difficulties surmounted, Netcore Oy not those you stumble over and fall" Systems. Networks. Security. -- Robert Jordan: A Crown of Swords ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] IPv6: Improvement of Source Address Selection 2002-10-10 14:29 ` Pekka Savola @ 2002-10-10 15:23 ` YOSHIFUJI Hideaki / 吉藤英明 0 siblings, 0 replies; 27+ messages in thread From: YOSHIFUJI Hideaki / 吉藤英明 @ 2002-10-10 15:23 UTC (permalink / raw) To: pekkas; +Cc: netdev, usagi In article <Pine.LNX.4.44.0210101724030.9287-100000@netcore.fi> (at Thu, 10 Oct 2002 17:29:38 +0300 (EEST)), Pekka Savola <pekkas@netcore.fi> says: > Dave, Alexey.. I think there has to be a high-level decision on how to > proceed here. I'm referring to the optimization. TCP or other > connection-oriented protocols use this one per connection; UDP and the > like probably once per packet. The latter is at least quite undesirable, > as you pointed out. Hmm, what we think is, performance critical applications such as DVTS (Digital Video Transport System) will do bind(2) so the latter is not fatal problem. We need improved source address selection for further feature(s) (like privacy extentions). Our code is easy to implement further rules and order of calculation cost is same as before; O(n) while n is number of addresses. We think netdev and/or usagi can develop optimization later, and we think people can survive until then. > Putting the stuff in the routing table could work, but then this algorithm > would have to be re-run always when there are changes in any address in > the node. There might be other ways. How about - store one (daddr,ddev,saddr,tstamp) set in sk - update addrconf_tstamp in addrconf_verify() (etc.) - check tstamp and addrconf_tstamp and use saddr if ok in saddr selection --yoshfuji ^ permalink raw reply [flat|nested] 27+ messages in thread
end of thread, other threads:[~2002-10-10 15:23 UTC | newest] Thread overview: 27+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2002-09-27 15:17 [PATCH] IPv6: Improvement of Source Address Selection YOSHIFUJI Hideaki / 吉藤英明 2002-09-27 16:02 ` kuznet 2002-09-27 16:28 ` Pekka Savola 2002-09-27 16:55 ` kuznet 2002-09-28 1:28 ` David S. Miller 2002-09-28 2:28 ` kuznet 2002-09-28 2:34 ` Andi Kleen 2002-09-28 2:35 ` David S. Miller 2002-09-28 2:58 ` kuznet 2002-09-28 2:55 ` David S. Miller 2002-09-28 3:38 ` kuznet 2002-09-28 3:36 ` David S. Miller 2002-09-28 4:19 ` kuznet 2002-09-28 4:30 ` YOSHIFUJI Hideaki / 吉藤英明 2002-09-28 4:44 ` kuznet 2002-09-28 5:14 ` YOSHIFUJI Hideaki / 吉藤英明 2002-09-28 5:26 ` kuznet 2002-09-28 4:35 ` Pekka Savola 2002-09-28 5:00 ` kuznet 2002-09-28 5:24 ` Pekka Savola 2002-09-28 5:37 ` kuznet 2002-09-29 8:41 ` Pekka Savola 2002-10-03 16:50 ` YOSHIFUJI Hideaki / 吉藤英明 2002-10-04 6:32 ` Pekka Savola 2002-10-05 18:33 ` YOSHIFUJI Hideaki / 吉藤英明 2002-10-10 14:29 ` Pekka Savola 2002-10-10 15:23 ` YOSHIFUJI Hideaki / 吉藤英明
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).