* [RFC]: not possible to do policy routing for next hop on tunnels. @ 2010-09-01 12:21 Anders Franzen 2010-09-01 12:49 ` Eric Dumazet 0 siblings, 1 reply; 4+ messages in thread From: Anders Franzen @ 2010-09-01 12:21 UTC (permalink / raw) To: netdev@vger.kernel.org If I have multiple routing choices to reach a tunnel end point, I would like to mark different flows with iptables, after the postrouting of the primary route look up, this would put an fwmark on the skb. The route would in my case resolve to an ip6_tunnel. In the tunnel a secondary route lookup is done to select next hop for the tunnel end point. I would like to apply policy routing to the secondary lookup. This will not work, for two reasons: 1. None of the tunnels (ipip, gre, ip6_tunnel) I looked at regards the fwmark at the skb, when performing the route lookup. 2. ip6_tunnel is keeping a local dst cache, so it will never reroute as long as the current cache is valid. I wonder if there is a reason for not giving the fwmark at the tunnel route for tunnels in general? And the local dst cache for ip6_tunnel, is it needed, is not the routing subsystem keeping some form of internal cache (ip route ls cache). Best Regards /Anders ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [RFC]: not possible to do policy routing for next hop on tunnels. 2010-09-01 12:21 [RFC]: not possible to do policy routing for next hop on tunnels Anders Franzen @ 2010-09-01 12:49 ` Eric Dumazet 2010-09-01 13:27 ` Anders Franzen 0 siblings, 1 reply; 4+ messages in thread From: Eric Dumazet @ 2010-09-01 12:49 UTC (permalink / raw) To: Anders Franzen; +Cc: netdev@vger.kernel.org Le mercredi 01 septembre 2010 à 14:21 +0200, Anders Franzen a écrit : > If I have multiple routing choices to reach a tunnel end point, > I would like to mark different flows with iptables, after the > postrouting of the primary route look up, this would put an fwmark on > the skb. The route would in my case resolve to an ip6_tunnel. > > In the tunnel a secondary route lookup is done to select next hop for > the tunnel end point. > > I would like to apply policy routing to the secondary lookup. > > This will not work, for two reasons: > > 1. None of the tunnels (ipip, gre, ip6_tunnel) I looked at regards the > fwmark at the skb, when performing the route lookup. > > 2. ip6_tunnel is keeping a local dst cache, so it will never reroute as > long as the current cache is valid. > > > I wonder if there is a reason for not giving the fwmark at the tunnel > route for tunnels in general? > > And the local dst cache for ip6_tunnel, is it needed, is not the routing > subsystem keeping some form of internal cache (ip route ls cache). ip6_tunnel (or others) could be extended with a IP6_TNL_F_USE_ORIG_FWMARK, and in this case not use/cache the route. untested patch to get the idea : diff --git a/include/linux/ip6_tunnel.h b/include/linux/ip6_tunnel.h index acb9ad6..bf22b03 100644 --- a/include/linux/ip6_tunnel.h +++ b/include/linux/ip6_tunnel.h @@ -16,6 +16,8 @@ #define IP6_TNL_F_MIP6_DEV 0x8 /* copy DSCP from the outer packet */ #define IP6_TNL_F_RCV_DSCP_COPY 0x10 +/* copy fwmark from inner packet */ +#define IP6_TNL_F_USE_ORIG_FWMARK 0x20 struct ip6_tnl_parm { char name[IFNAMSIZ]; /* name of tunnel device */ diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 0fd027f..e31a91f 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -858,7 +858,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, int err = -1; int pkt_len; - if ((dst = ip6_tnl_dst_check(t)) != NULL) + if (!fl->mark && (dst = ip6_tnl_dst_check(t)) != NULL) dst_hold(dst); else { dst = ip6_route_output(net, NULL, fl); @@ -910,7 +910,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb = new_skb; } skb_dst_drop(skb); - skb_dst_set(skb, dst_clone(dst)); + skb_dst_set(skb, fl->mark ? dst : dst_clone(dst)); skb->transport_header = skb->network_header; @@ -940,7 +940,9 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, stats->tx_errors++; stats->tx_aborted_errors++; } - ip6_tnl_dst_store(t, dst); + if (!fl->mark) + ip6_tnl_dst_store(t, dst); + return 0; tx_err_link_failure: stats->tx_carrier_errors++; @@ -976,6 +978,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) fl.fl6_flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) & IPV6_TCLASS_MASK; + if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)) + fl.mark = skb->mark; err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu); if (err != 0) { @@ -1026,7 +1030,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)) fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); - + if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)) + fl.mark = skb->mark; err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu); if (err != 0) { if (err == -EMSGSIZE) ^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [RFC]: not possible to do policy routing for next hop on tunnels. 2010-09-01 12:49 ` Eric Dumazet @ 2010-09-01 13:27 ` Anders Franzen 2010-09-01 13:37 ` Eric Dumazet 0 siblings, 1 reply; 4+ messages in thread From: Anders Franzen @ 2010-09-01 13:27 UTC (permalink / raw) To: Eric Dumazet; +Cc: netdev@vger.kernel.org Quick answer! It seems do exactly what I want. Is there a corresponding 'fwmark inherit' parameter added to the ip command somewhere? Will this patch get it into the kernel? If you need testing I will do that. /Anders On Wed, 2010-09-01 at 14:49 +0200, Eric Dumazet wrote: > Le mercredi 01 septembre 2010 à 14:21 +0200, Anders Franzen a écrit : > > If I have multiple routing choices to reach a tunnel end point, > > I would like to mark different flows with iptables, after the > > postrouting of the primary route look up, this would put an fwmark on > > the skb. The route would in my case resolve to an ip6_tunnel. > > > > In the tunnel a secondary route lookup is done to select next hop for > > the tunnel end point. > > > > I would like to apply policy routing to the secondary lookup. > > > > This will not work, for two reasons: > > > > 1. None of the tunnels (ipip, gre, ip6_tunnel) I looked at regards the > > fwmark at the skb, when performing the route lookup. > > > > 2. ip6_tunnel is keeping a local dst cache, so it will never reroute as > > long as the current cache is valid. > > > > > > I wonder if there is a reason for not giving the fwmark at the tunnel > > route for tunnels in general? > > > > And the local dst cache for ip6_tunnel, is it needed, is not the routing > > subsystem keeping some form of internal cache (ip route ls cache). > > ip6_tunnel (or others) could be extended with a > IP6_TNL_F_USE_ORIG_FWMARK, and in this case not use/cache the route. > > untested patch to get the idea : > > diff --git a/include/linux/ip6_tunnel.h b/include/linux/ip6_tunnel.h > index acb9ad6..bf22b03 100644 > --- a/include/linux/ip6_tunnel.h > +++ b/include/linux/ip6_tunnel.h > @@ -16,6 +16,8 @@ > #define IP6_TNL_F_MIP6_DEV 0x8 > /* copy DSCP from the outer packet */ > #define IP6_TNL_F_RCV_DSCP_COPY 0x10 > +/* copy fwmark from inner packet */ > +#define IP6_TNL_F_USE_ORIG_FWMARK 0x20 > > struct ip6_tnl_parm { > char name[IFNAMSIZ]; /* name of tunnel device */ > diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c > index 0fd027f..e31a91f 100644 > --- a/net/ipv6/ip6_tunnel.c > +++ b/net/ipv6/ip6_tunnel.c > @@ -858,7 +858,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, > int err = -1; > int pkt_len; > > - if ((dst = ip6_tnl_dst_check(t)) != NULL) > + if (!fl->mark && (dst = ip6_tnl_dst_check(t)) != NULL) > dst_hold(dst); > else { > dst = ip6_route_output(net, NULL, fl); > @@ -910,7 +910,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, > skb = new_skb; > } > skb_dst_drop(skb); > - skb_dst_set(skb, dst_clone(dst)); > + skb_dst_set(skb, fl->mark ? dst : dst_clone(dst)); > > skb->transport_header = skb->network_header; > > @@ -940,7 +940,9 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, > stats->tx_errors++; > stats->tx_aborted_errors++; > } > - ip6_tnl_dst_store(t, dst); > + if (!fl->mark) > + ip6_tnl_dst_store(t, dst); > + > return 0; > tx_err_link_failure: > stats->tx_carrier_errors++; > @@ -976,6 +978,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) > if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) > fl.fl6_flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) > & IPV6_TCLASS_MASK; > + if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)) > + fl.mark = skb->mark; > > err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu); > if (err != 0) { > @@ -1026,7 +1030,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) > fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); > if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)) > fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); > - > + if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)) > + fl.mark = skb->mark; > err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu); > if (err != 0) { > if (err == -EMSGSIZE) > > ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [RFC]: not possible to do policy routing for next hop on tunnels. 2010-09-01 13:27 ` Anders Franzen @ 2010-09-01 13:37 ` Eric Dumazet 0 siblings, 0 replies; 4+ messages in thread From: Eric Dumazet @ 2010-09-01 13:37 UTC (permalink / raw) To: Anders Franzen; +Cc: netdev@vger.kernel.org Le mercredi 01 septembre 2010 à 15:27 +0200, Anders Franzen a écrit : > > Quick answer! > > It seems do exactly what I want. > Is there a corresponding 'fwmark inherit' parameter added to the ip > command somewhere? > Will this patch get it into the kernel? > > If you need testing I will do that. Following iproute2 patch (against current git) should be applied. I suggest we continue by private mails to polish patches ;) Thanks diff --git a/include/linux/ip6_tunnel.h b/include/linux/ip6_tunnel.h index acb9ad6..bf22b03 100644 --- a/include/linux/ip6_tunnel.h +++ b/include/linux/ip6_tunnel.h @@ -16,6 +16,8 @@ #define IP6_TNL_F_MIP6_DEV 0x8 /* copy DSCP from the outer packet */ #define IP6_TNL_F_RCV_DSCP_COPY 0x10 +/* copy fwmark from inner packet */ +#define IP6_TNL_F_USE_ORIG_FWMARK 0x20 struct ip6_tnl_parm { char name[IFNAMSIZ]; /* name of tunnel device */ diff --git a/ip/ip6tunnel.c b/ip/ip6tunnel.c index 203e4a3..2c406e3 100644 --- a/ip/ip6tunnel.c +++ b/ip/ip6tunnel.c @@ -51,7 +51,7 @@ static void usage(void) fprintf(stderr, " [ remote ADDR local ADDR ] [ dev PHYS_DEV ]\n"); fprintf(stderr, " [ encaplimit ELIM ]\n"); fprintf(stderr ," [ hoplimit TTL ] [ tclass TCLASS ] [ flowlabel FLOWLABEL ]\n"); - fprintf(stderr, " [ dscp inherit ]\n"); + fprintf(stderr, " [ dscp inherit ] [ fwmark inherit] \n"); fprintf(stderr, "\n"); fprintf(stderr, "Where: NAME := STRING\n"); fprintf(stderr, " ADDR := IPV6_ADDRESS\n"); @@ -103,6 +103,10 @@ static void print_tunnel(struct ip6_tnl_parm *p) if (p->flags & IP6_TNL_F_RCV_DSCP_COPY) printf(" dscp inherit"); + + if (p->flags & IP6_TNL_F_USE_ORIG_FWMARK) + printf(" fwmark inherit"); + } static int parse_args(int argc, char **argv, struct ip6_tnl_parm *p) @@ -197,6 +201,11 @@ static int parse_args(int argc, char **argv, struct ip6_tnl_parm *p) if (strcmp(*argv, "inherit") != 0) invarg("not inherit", *argv); p->flags |= IP6_TNL_F_RCV_DSCP_COPY; + } else if (strcmp(*argv, "fwmark") == 0) { + NEXT_ARG(); + if (strcmp(*argv, "inherit") != 0) + invarg("not inherit", *argv); + p->flags |= IP6_TNL_F_USE_ORIG_FWMARK; } else { if (strcmp(*argv, "name") == 0) { NEXT_ARG(); ^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2010-09-01 13:37 UTC | newest] Thread overview: 4+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2010-09-01 12:21 [RFC]: not possible to do policy routing for next hop on tunnels Anders Franzen 2010-09-01 12:49 ` Eric Dumazet 2010-09-01 13:27 ` Anders Franzen 2010-09-01 13:37 ` Eric Dumazet
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox