* [PATCH net-next RFC v2 2/3] ipv4: add support for light weight tunnel encap attributes
@ 2015-06-19 4:49 Roopa Prabhu
2015-06-19 6:59 ` Julian Anastasov
` (3 more replies)
0 siblings, 4 replies; 15+ messages in thread
From: Roopa Prabhu @ 2015-06-19 4:49 UTC (permalink / raw)
To: ebiederm, rshearma, tgraf; +Cc: davem, netdev
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Introduces two netlink attributes RTA_ENCAP_TYPE and
RTA_ENCAP to support attaching encap information to ipv4 routes.
RTA_ENCAP is a nested attribute as suggested by Thomas
(and also as Robert had it in his series). RTA_ENCAP
netlink policy is declared by the light weight tunnel
drivers that support this encap type.
fib code calls the following for each nexthop:
- new route handler:
lwt build state (that parses RTA_ENCAP and returns
lwt state that lives in every fib_nh)
- del dump hanlder:
lwt release handler to release lwt state data
- route dump hanlder:
lwt dump encap to fill RTA_ENCAP data
- during input route lookup
sets dst->output to lwtunnel_output which
in turn calls the corresponding lwt tunnel
output function which applies the required
encap and xmits the packet
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
---
include/net/ip_fib.h | 7 ++-
include/net/route.h | 3 ++
include/uapi/linux/rtnetlink.h | 3 +-
net/ipv4/fib_frontend.c | 8 ++++
net/ipv4/fib_semantics.c | 93 +++++++++++++++++++++++++++++++++++++++-
net/ipv4/route.c | 33 +++++++++++++-
6 files changed, 142 insertions(+), 5 deletions(-)
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 54271ed..49f18d7 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -44,7 +44,9 @@ struct fib_config {
u32 fc_flow;
u32 fc_nlflags;
struct nl_info fc_nlinfo;
- };
+ struct nlattr *fc_encap;
+ u16 fc_encap_type;
+};
struct fib_info;
struct rtable;
@@ -89,6 +91,9 @@ struct fib_nh {
struct rtable __rcu * __percpu *nh_pcpu_rth_output;
struct rtable __rcu *nh_rth_input;
struct fnhe_hash_bucket __rcu *nh_exceptions;
+#ifdef CONFIG_LWTUNNEL
+ struct lwtunnel_state *nh_lwtstate;
+#endif
};
/*
diff --git a/include/net/route.h b/include/net/route.h
index fe22d03..39a6495 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -66,6 +66,9 @@ struct rtable {
struct list_head rt_uncached;
struct uncached_list *rt_uncached_list;
+#ifdef CONFIG_LWTUNNEL
+ struct lwtunnel_state *rt_lwtstate;
+#endif
};
static inline bool rt_is_input_route(const struct rtable *rt)
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 17fb02f..6c089ad 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -308,6 +308,8 @@ enum rtattr_type_t {
RTA_VIA,
RTA_NEWDST,
RTA_PREF,
+ RTA_ENCAP_TYPE,
+ RTA_ENCAP,
__RTA_MAX
};
@@ -357,7 +359,6 @@ struct rtvia {
};
/* RTM_CACHEINFO */
-
struct rta_cacheinfo {
__u32 rta_clntref;
__u32 rta_lastuse;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 872494e..fbe0630 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -591,6 +591,8 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
[RTA_METRICS] = { .type = NLA_NESTED },
[RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
[RTA_FLOW] = { .type = NLA_U32 },
+ [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
+ [RTA_ENCAP] = { .type = NLA_NESTED },
};
static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
@@ -656,6 +658,12 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
case RTA_TABLE:
cfg->fc_table = nla_get_u32(attr);
break;
+ case RTA_ENCAP:
+ cfg->fc_encap = attr;
+ break;
+ case RTA_ENCAP_TYPE:
+ cfg->fc_encap_type = nla_get_u16(attr);
+ break;
}
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 28ec3c1..54dd287 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -42,6 +42,7 @@
#include <net/ip_fib.h>
#include <net/netlink.h>
#include <net/nexthop.h>
+#include <net/lwtunnel.h>
#include "fib_lookup.h"
@@ -208,6 +209,10 @@ static void free_fib_info_rcu(struct rcu_head *head)
change_nexthops(fi) {
if (nexthop_nh->nh_dev)
dev_put(nexthop_nh->nh_dev);
+#ifdef CONFIG_LWTUNNEL
+ if (nexthop_nh->nh_lwtstate)
+ lwtunnel_state_put(nexthop_nh->nh_lwtstate);
+#endif
free_nh_exceptions(nexthop_nh);
rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output);
rt_fibinfo_free(&nexthop_nh->nh_rth_input);
@@ -366,6 +371,7 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
if (fi->fib_nhs) {
+ size_t nh_encapsize = 0;
/* Also handles the special case fib_nhs == 1 */
/* each nexthop is packed in an attribute */
@@ -374,8 +380,23 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
/* may contain flow and gateway attribute */
nhsize += 2 * nla_total_size(4);
+#ifdef CONFIG_LWTUNNEL
+ /* grab encap info */
+ for_nexthops(fi) {
+ if (nh->nh_lwtstate) {
+ /* RTA_ENCAP_TYPE */
+ nh_encapsize += lwtunnel_get_encap_size(
+ nh->nh_lwtstate);
+ /* RTA_ENCAP */
+ nh_encapsize += nla_total_size(2);
+ }
+ } endfor_nexthops(fi);
+#endif
+
/* all nexthops are packed in a nested attribute */
- payload += nla_total_size(fi->fib_nhs * nhsize);
+ payload += nla_total_size((fi->fib_nhs * nhsize) +
+ nh_encapsize);
+
}
return payload;
@@ -452,6 +473,9 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
int remaining, struct fib_config *cfg)
{
+ struct net *net = cfg->fc_nlinfo.nl_net;
+ int ret;
+
change_nexthops(fi) {
int attrlen;
@@ -475,12 +499,40 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
if (nexthop_nh->nh_tclassid)
fi->fib_net->ipv4.fib_num_tclassid_users++;
#endif
+#ifdef CONFIG_LWTUNNEL
+ nla = nla_find(attrs, attrlen, RTA_ENCAP);
+ if (nla) {
+ struct lwtunnel_state *lwtstate;
+ struct net_device *dev = NULL;
+ struct nlattr *nla_entype;
+
+ nla_entype = nla_find(attrs, attrlen,
+ RTA_ENCAP_TYPE);
+ if (!nla_entype)
+ goto err_inval;
+ if (cfg->fc_oif)
+ dev = __dev_get_by_index(net, cfg->fc_oif);
+ ret = lwtunnel_build_state(dev, nla_get_u16(
+ nla_entype),
+ nla, &lwtstate);
+ if (ret)
+ goto errout;
+ lwtunnel_state_get(lwtstate);
+ nexthop_nh->nh_lwtstate = lwtstate;
+ }
+#endif
}
rtnh = rtnh_next(rtnh, &remaining);
} endfor_nexthops(fi);
return 0;
+
+err_inval:
+ ret = -EINVAL;
+
+errout:
+ return ret;
}
#endif
@@ -875,6 +927,25 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
} else {
struct fib_nh *nh = fi->fib_nh;
+#ifdef CONFIG_LWTUNNEL
+ if (cfg->fc_encap) {
+ struct lwtunnel_state *lwtstate;
+ struct net_device *dev = NULL;
+ int ret;
+
+ if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE)
+ goto err_inval;
+ if (cfg->fc_oif)
+ dev = __dev_get_by_index(net, cfg->fc_oif);
+ ret = lwtunnel_build_state(dev, cfg->fc_encap_type,
+ cfg->fc_encap, &lwtstate);
+ if (ret)
+ goto err_inval;
+
+ lwtunnel_state_get(lwtstate);
+ nh->nh_lwtstate = lwtstate;
+ }
+#endif
nh->nh_oif = cfg->fc_oif;
nh->nh_gw = cfg->fc_gw;
nh->nh_flags = cfg->fc_flags;
@@ -1034,7 +1105,17 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
goto nla_put_failure;
#endif
+#ifdef CONFIG_LWTUNNEL
+ if (fi->fib_nh->nh_lwtstate) {
+ struct lwtunnel_state *lwtstate;
+
+ lwtstate = fi->fib_nh->nh_lwtstate;
+ if (nla_put_u16(skb, RTA_ENCAP_TYPE, lwtstate->type))
+ goto nla_put_failure;
+ lwtunnel_fill_encap(skb, lwtstate);
+ }
}
+#endif
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (fi->fib_nhs > 1) {
struct rtnexthop *rtnh;
@@ -1061,6 +1142,16 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
goto nla_put_failure;
#endif
+#ifdef CONFIG_LWTUNNEL
+ if (nh->nh_lwtstate) {
+ struct lwtunnel_state *lwtstate;
+
+ lwtstate = nh->nh_lwtstate;
+ if (nla_put_u16(skb, RTA_ENCAP_TYPE, lwtstate->type))
+ goto nla_put_failure;
+ lwtunnel_fill_encap(skb, lwtstate);
+ }
+#endif
/* length of rtnetlink header + attributes */
rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
} endfor_nexthops(fi);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f605598..c6549f9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -102,6 +102,7 @@
#include <net/tcp.h>
#include <net/icmp.h>
#include <net/xfrm.h>
+#include <net/lwtunnel.h>
#include <net/netevent.h>
#include <net/rtnetlink.h>
#ifdef CONFIG_SYSCTL
@@ -1355,6 +1356,9 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
list_del(&rt->rt_uncached);
spin_unlock_bh(&ul->lock);
}
+#ifdef CONFIG_LWTUNNEL
+ lwtunnel_state_put(rt->rt_lwtstate);
+#endif
}
void rt_flush_dev(struct net_device *dev)
@@ -1403,6 +1407,14 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
#endif
+#ifdef CONFIG_LWTUNNEL
+ if (nh->nh_lwtstate) {
+ lwtunnel_state_get(nh->nh_lwtstate);
+ rt->rt_lwtstate = nh->nh_lwtstate;
+ } else {
+ rt->rt_lwtstate = NULL;
+ }
+#endif
if (unlikely(fnhe))
cached = rt_bind_exception(rt, fnhe, daddr);
else if (!(rt->dst.flags & DST_NOCACHE))
@@ -1488,6 +1500,9 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->rt_gateway = 0;
rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
+#ifdef CONFIG_LWTUNNEL
+ rth->rt_lwtstate = NULL;
+#endif
if (our) {
rth->dst.input= ip_local_deliver;
rth->rt_flags |= RTCF_LOCAL;
@@ -1618,12 +1633,19 @@ static int __mkroute_input(struct sk_buff *skb,
rth->rt_gateway = 0;
rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
+#ifdef CONFIG_LWTUNNEL
+ rth->rt_lwtstate = NULL;
+#endif
RT_CACHE_STAT_INC(in_slow_tot);
rth->dst.input = ip_forward;
rth->dst.output = ip_output;
rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
+#ifdef CONFIG_LWTUNNEL
+ if (lwtunnel_output_redirect(rth->rt_lwtstate))
+ rth->dst.output = lwtunnel_output;
+#endif
skb_dst_set(skb, &rth->dst);
out:
err = 0;
@@ -1792,6 +1814,9 @@ local_input:
rth->rt_gateway = 0;
rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
+#ifdef CONFIG_LWTUNNEL
+ rth->rt_lwtstate = NULL;
+#endif
RT_CACHE_STAT_INC(in_slow_tot);
if (res.type == RTN_UNREACHABLE) {
rth->dst.input= ip_error;
@@ -1981,7 +2006,9 @@ add:
rth->rt_gateway = 0;
rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
-
+#ifdef CONFIG_LWTUNNEL
+ rth->rt_lwtstate = NULL;
+#endif
RT_CACHE_STAT_INC(out_slow_tot);
if (flags & RTCF_LOCAL)
@@ -2261,7 +2288,9 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_uses_gateway = ort->rt_uses_gateway;
INIT_LIST_HEAD(&rt->rt_uncached);
-
+#ifdef CONFIG_LWTUNNEL
+ rt->rt_lwtstate = NULL;
+#endif
dst_free(new);
}
--
1.7.10.4
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH net-next RFC v2 2/3] ipv4: add support for light weight tunnel encap attributes
2015-06-19 4:49 [PATCH net-next RFC v2 2/3] ipv4: add support for light weight tunnel encap attributes Roopa Prabhu
@ 2015-06-19 6:59 ` Julian Anastasov
2015-06-19 14:19 ` roopa
2015-06-19 15:19 ` Robert Shearman
` (2 subsequent siblings)
3 siblings, 1 reply; 15+ messages in thread
From: Julian Anastasov @ 2015-06-19 6:59 UTC (permalink / raw)
To: Roopa Prabhu; +Cc: ebiederm, rshearma, tgraf, davem, netdev
Hello,
On Thu, 18 Jun 2015, Roopa Prabhu wrote:
> @@ -366,6 +371,7 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
> payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
>
> if (fi->fib_nhs) {
> + size_t nh_encapsize = 0;
Var not in #ifdef. Any warnings with CONFIG_LWTUNNEL=n?
> /* Also handles the special case fib_nhs == 1 */
>
> /* each nexthop is packed in an attribute */
> @@ -374,8 +380,23 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
> /* may contain flow and gateway attribute */
> nhsize += 2 * nla_total_size(4);
>
> +#ifdef CONFIG_LWTUNNEL
> + /* grab encap info */
> + for_nexthops(fi) {
> + if (nh->nh_lwtstate) {
> + /* RTA_ENCAP_TYPE */
> + nh_encapsize += lwtunnel_get_encap_size(
> + nh->nh_lwtstate);
New labels not in #ifdef:
> +
> +err_inval:
> + ret = -EINVAL;
> +
> +errout:
> + return ret;
> }
Some other places may need changes:
- nh_comp: there is logic that decides if same fib_info
is reused from many fib nodes. There should be check
if NH matches by nh_lwtstate.
- xfrm4_fill_dst: not sure about this but some fields
are copied.
Regards
--
Julian Anastasov <ja@ssi.bg>
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH net-next RFC v2 2/3] ipv4: add support for light weight tunnel encap attributes
2015-06-19 6:59 ` Julian Anastasov
@ 2015-06-19 14:19 ` roopa
2015-06-19 14:55 ` Robert Shearman
0 siblings, 1 reply; 15+ messages in thread
From: roopa @ 2015-06-19 14:19 UTC (permalink / raw)
To: Julian Anastasov; +Cc: ebiederm, rshearma, tgraf, davem, netdev
On 6/18/15, 11:59 PM, Julian Anastasov wrote:
> Hello,
>
> On Thu, 18 Jun 2015, Roopa Prabhu wrote:
>
>> @@ -366,6 +371,7 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
>> payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
>>
>> if (fi->fib_nhs) {
>> + size_t nh_encapsize = 0;
> Var not in #ifdef. Any warnings with CONFIG_LWTUNNEL=n?
>
>> /* Also handles the special case fib_nhs == 1 */
>>
>> /* each nexthop is packed in an attribute */
>> @@ -374,8 +380,23 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
>> /* may contain flow and gateway attribute */
>> nhsize += 2 * nla_total_size(4);
>>
>> +#ifdef CONFIG_LWTUNNEL
>> + /* grab encap info */
>> + for_nexthops(fi) {
>> + if (nh->nh_lwtstate) {
>> + /* RTA_ENCAP_TYPE */
>> + nh_encapsize += lwtunnel_get_encap_size(
>> + nh->nh_lwtstate);
> New labels not in #ifdef:
Will check and fix all warnings with CONFIG_LWTUNNEL off
>
>> +
>> +err_inval:
>> + ret = -EINVAL;
>> +
>> +errout:
>> + return ret;
>> }
> Some other places may need changes:
>
> - nh_comp: there is logic that decides if same fib_info
> is reused from many fib nodes. There should be check
> if NH matches by nh_lwtstate.
yes, i will add that.
>
> - xfrm4_fill_dst: not sure about this but some fields
> are copied.
>
I have not picked up xfrm4_fill_dst specifically, but this infra is
supposed to be similar to that.
I will look.
Thanks.
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH net-next RFC v2 2/3] ipv4: add support for light weight tunnel encap attributes
2015-06-19 14:19 ` roopa
@ 2015-06-19 14:55 ` Robert Shearman
2015-06-19 15:15 ` roopa
0 siblings, 1 reply; 15+ messages in thread
From: Robert Shearman @ 2015-06-19 14:55 UTC (permalink / raw)
To: roopa, Julian Anastasov; +Cc: ebiederm, tgraf, davem, netdev
On 19/06/15 15:19, roopa wrote:
> On 6/18/15, 11:59 PM, Julian Anastasov wrote:
>> Hello,
>>
>> On Thu, 18 Jun 2015, Roopa Prabhu wrote:
>>
>>> @@ -366,6 +371,7 @@ static inline size_t fib_nlmsg_size(struct
>>> fib_info *fi)
>>> payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
>>> if (fi->fib_nhs) {
>>> + size_t nh_encapsize = 0;
>> Var not in #ifdef. Any warnings with CONFIG_LWTUNNEL=n?
>>
>>> /* Also handles the special case fib_nhs == 1 */
>>> /* each nexthop is packed in an attribute */
>>> @@ -374,8 +380,23 @@ static inline size_t fib_nlmsg_size(struct
>>> fib_info *fi)
>>> /* may contain flow and gateway attribute */
>>> nhsize += 2 * nla_total_size(4);
>>> +#ifdef CONFIG_LWTUNNEL
>>> + /* grab encap info */
>>> + for_nexthops(fi) {
>>> + if (nh->nh_lwtstate) {
>>> + /* RTA_ENCAP_TYPE */
>>> + nh_encapsize += lwtunnel_get_encap_size(
>>> + nh->nh_lwtstate);
>> New labels not in #ifdef:
> Will check and fix all warnings with CONFIG_LWTUNNEL off
>>
>>> +
>>> +err_inval:
>>> + ret = -EINVAL;
>>> +
>>> +errout:
>>> + return ret;
>>> }
>> Some other places may need changes:
>>
>> - nh_comp: there is logic that decides if same fib_info
>> is reused from many fib nodes. There should be check
>> if NH matches by nh_lwtstate.
>
> yes, i will add that.
One other place - fib_nh_match. This is used when deleting a route to
verify that any supplied rtnetlink properties match the route in the fib.
Thanks,
Rob
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH net-next RFC v2 2/3] ipv4: add support for light weight tunnel encap attributes
2015-06-19 14:55 ` Robert Shearman
@ 2015-06-19 15:15 ` roopa
0 siblings, 0 replies; 15+ messages in thread
From: roopa @ 2015-06-19 15:15 UTC (permalink / raw)
To: Robert Shearman; +Cc: Julian Anastasov, ebiederm, tgraf, davem, netdev
On 6/19/15, 7:55 AM, Robert Shearman wrote:
> On 19/06/15 15:19, roopa wrote:
>> On 6/18/15, 11:59 PM, Julian Anastasov wrote:
>>> Some other places may need changes:
>>>
>>> - nh_comp: there is logic that decides if same fib_info
>>> is reused from many fib nodes. There should be check
>>> if NH matches by nh_lwtstate.
>>
>> yes, i will add that.
>
> One other place - fib_nh_match. This is used when deleting a route to
> verify that any supplied rtnetlink properties match the route in the fib.
ack, thanks!.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next RFC v2 2/3] ipv4: add support for light weight tunnel encap attributes
2015-06-19 4:49 [PATCH net-next RFC v2 2/3] ipv4: add support for light weight tunnel encap attributes Roopa Prabhu
2015-06-19 6:59 ` Julian Anastasov
@ 2015-06-19 15:19 ` Robert Shearman
2015-06-19 15:28 ` roopa
2015-06-21 20:20 ` Thomas Graf
2015-07-03 10:00 ` Summary lightweight tunnel discussion at NFWS Thomas Graf
3 siblings, 1 reply; 15+ messages in thread
From: Robert Shearman @ 2015-06-19 15:19 UTC (permalink / raw)
To: Roopa Prabhu, ebiederm, tgraf; +Cc: davem, netdev
On 19/06/15 05:49, Roopa Prabhu wrote:
> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>
> Introduces two netlink attributes RTA_ENCAP_TYPE and
> RTA_ENCAP to support attaching encap information to ipv4 routes.
Surely RTA_ENCAP_TYPE should be part of RTA_ENCAP, since the type
doesn't make sense without the data and vice versa?
>
> RTA_ENCAP is a nested attribute as suggested by Thomas
> (and also as Robert had it in his series). RTA_ENCAP
> netlink policy is declared by the light weight tunnel
> drivers that support this encap type.
>
> fib code calls the following for each nexthop:
> - new route handler:
> lwt build state (that parses RTA_ENCAP and returns
> lwt state that lives in every fib_nh)
> - del dump hanlder:
> lwt release handler to release lwt state data
> - route dump hanlder:
> lwt dump encap to fill RTA_ENCAP data
> - during input route lookup
> sets dst->output to lwtunnel_output which
> in turn calls the corresponding lwt tunnel
> output function which applies the required
> encap and xmits the packet
>
> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Thanks,
Rob
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next RFC v2 2/3] ipv4: add support for light weight tunnel encap attributes
2015-06-19 15:19 ` Robert Shearman
@ 2015-06-19 15:28 ` roopa
2015-06-19 17:17 ` Robert Shearman
0 siblings, 1 reply; 15+ messages in thread
From: roopa @ 2015-06-19 15:28 UTC (permalink / raw)
To: Robert Shearman; +Cc: ebiederm, tgraf, davem, netdev
On 6/19/15, 8:19 AM, Robert Shearman wrote:
> On 19/06/15 05:49, Roopa Prabhu wrote:
>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>
>> Introduces two netlink attributes RTA_ENCAP_TYPE and
>> RTA_ENCAP to support attaching encap information to ipv4 routes.
>
> Surely RTA_ENCAP_TYPE should be part of RTA_ENCAP, since the type
> doesn't make sense without the data and vice versa?
I went back and forth on this. And started with what you are saying
above. But then I wanted RTA_ENCAP netlink policy to be declared by
individual lwtunnel drivers.
And to determine which RTA_ENCAP netlink policy to pick, you need to
know the RTA_ENCAP policy type (or lwtunnel type)
which is encoded in RTA_ENCAP_TYPE. And I did not want to introduce
another level of nest in RTA_ENCAP (because for nexthops we are already
2 levels deep when parsing RTA_ENCAP).
Hence, fib code first looks for RTA_ENCAP and if RTA_ENCAP is specified,
RTA_ENCAP_TYPE is a required attribute. My iproute2 patches handles this
and makes sure
there is an RTA_ENCAP_TYPE specified with RTA_ENCAP.
thanks,
Roopa
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next RFC v2 2/3] ipv4: add support for light weight tunnel encap attributes
2015-06-19 15:28 ` roopa
@ 2015-06-19 17:17 ` Robert Shearman
2015-06-19 18:42 ` roopa
0 siblings, 1 reply; 15+ messages in thread
From: Robert Shearman @ 2015-06-19 17:17 UTC (permalink / raw)
To: roopa; +Cc: ebiederm, tgraf, davem, netdev
On 19/06/15 16:28, roopa wrote:
> On 6/19/15, 8:19 AM, Robert Shearman wrote:
>> On 19/06/15 05:49, Roopa Prabhu wrote:
>>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>>
>>> Introduces two netlink attributes RTA_ENCAP_TYPE and
>>> RTA_ENCAP to support attaching encap information to ipv4 routes.
>>
>> Surely RTA_ENCAP_TYPE should be part of RTA_ENCAP, since the type
>> doesn't make sense without the data and vice versa?
> I went back and forth on this. And started with what you are saying
> above. But then I wanted RTA_ENCAP netlink policy to be declared by
> individual lwtunnel drivers.
> And to determine which RTA_ENCAP netlink policy to pick, you need to
> know the RTA_ENCAP policy type (or lwtunnel type)
> which is encoded in RTA_ENCAP_TYPE. And I did not want to introduce
> another level of nest in RTA_ENCAP (because for nexthops we are already
> 2 levels deep when parsing RTA_ENCAP).
No need for that - use the example of how RTA_MULTIPATH is used for
ipv4/ipv6:
+----------------------+
| RTA_MULTIPATH |
+----------------------+
| +------------------+ |
| | struct rtnexthop | |
| +------------------+ |
| | RTA_GATEWAY, etc.| |
| +------------------+ |
+----------------------+
You could do similar for RTA_ENCAP where the type is stored in the data
prior to the nested attributes starting. E.g.:
+----------------------+
| RTA_ENCAP |
+----------------------+
| +------------------+ |
| | struct rtencap | |
| +------------------+ |
| | MPLS_IPTUNNEL_DST| |
| +------------------+ |
+----------------------+
struct rtencap {
__u16 rte_type;
};
>
> Hence, fib code first looks for RTA_ENCAP and if RTA_ENCAP is specified,
> RTA_ENCAP_TYPE is a required attribute. My iproute2 patches handles this
> and makes sure
> there is an RTA_ENCAP_TYPE specified with RTA_ENCAP.
No doubt, but surely it's better to present an unambiguous API to
userspace if possible?
Thanks,
Rob
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH net-next RFC v2 2/3] ipv4: add support for light weight tunnel encap attributes
2015-06-19 17:17 ` Robert Shearman
@ 2015-06-19 18:42 ` roopa
0 siblings, 0 replies; 15+ messages in thread
From: roopa @ 2015-06-19 18:42 UTC (permalink / raw)
To: Robert Shearman; +Cc: ebiederm, tgraf, davem, netdev
On 6/19/15, 10:17 AM, Robert Shearman wrote:
>
> No need for that - use the example of how RTA_MULTIPATH is used for
> ipv4/ipv6:
>
> +----------------------+
> | RTA_MULTIPATH |
> +----------------------+
> | +------------------+ |
> | | struct rtnexthop | |
> | +------------------+ |
> | | RTA_GATEWAY, etc.| |
> | +------------------+ |
> +----------------------+
>
> You could do similar for RTA_ENCAP where the type is stored in the
> data prior to the nested attributes starting. E.g.:
>
> +----------------------+
> | RTA_ENCAP |
> +----------------------+
> | +------------------+ |
> | | struct rtencap | |
> | +------------------+ |
> | | MPLS_IPTUNNEL_DST| |
> | +------------------+ |
> +----------------------+
>
> struct rtencap {
> __u16 rte_type;
> };
I did think about that...but today the rtnextop seems like it was
written a struct initially and then extended with attributes only
because the struct could not be extended (I maybe wrong). But half the
fields are in a struct and the others are attributes. It gets confusing.
And i was trying to avoid that.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next RFC v2 2/3] ipv4: add support for light weight tunnel encap attributes
2015-06-19 4:49 [PATCH net-next RFC v2 2/3] ipv4: add support for light weight tunnel encap attributes Roopa Prabhu
2015-06-19 6:59 ` Julian Anastasov
2015-06-19 15:19 ` Robert Shearman
@ 2015-06-21 20:20 ` Thomas Graf
2015-06-22 2:30 ` roopa
2015-07-03 10:00 ` Summary lightweight tunnel discussion at NFWS Thomas Graf
3 siblings, 1 reply; 15+ messages in thread
From: Thomas Graf @ 2015-06-21 20:20 UTC (permalink / raw)
To: Roopa Prabhu; +Cc: ebiederm, rshearma, davem, netdev
On 06/18/15 at 09:49pm, Roopa Prabhu wrote:
> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>
> Introduces two netlink attributes RTA_ENCAP_TYPE and
> RTA_ENCAP to support attaching encap information to ipv4 routes.
>
> RTA_ENCAP is a nested attribute as suggested by Thomas
> (and also as Robert had it in his series). RTA_ENCAP
> netlink policy is declared by the light weight tunnel
> drivers that support this encap type.
>
> fib code calls the following for each nexthop:
> - new route handler:
> lwt build state (that parses RTA_ENCAP and returns
> lwt state that lives in every fib_nh)
> - del dump hanlder:
> lwt release handler to release lwt state data
> - route dump hanlder:
> lwt dump encap to fill RTA_ENCAP data
> - during input route lookup
> sets dst->output to lwtunnel_output which
> in turn calls the corresponding lwt tunnel
> output function which applies the required
> encap and xmits the packet
Thanks for putting in the flag! I think introducing helpers for the
lwt work would help as it can centralize the required ifdef magic
and be defined as a static inline nop or macros so the core routing
code doesn't need to have ifdefs spread around.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next RFC v2 2/3] ipv4: add support for light weight tunnel encap attributes
2015-06-21 20:20 ` Thomas Graf
@ 2015-06-22 2:30 ` roopa
0 siblings, 0 replies; 15+ messages in thread
From: roopa @ 2015-06-22 2:30 UTC (permalink / raw)
To: Thomas Graf; +Cc: ebiederm, rshearma, davem, netdev
On 6/21/15, 1:20 PM, Thomas Graf wrote:
> On 06/18/15 at 09:49pm, Roopa Prabhu wrote:
>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>
>> Introduces two netlink attributes RTA_ENCAP_TYPE and
>> RTA_ENCAP to support attaching encap information to ipv4 routes.
>>
>> RTA_ENCAP is a nested attribute as suggested by Thomas
>> (and also as Robert had it in his series). RTA_ENCAP
>> netlink policy is declared by the light weight tunnel
>> drivers that support this encap type.
>>
>> fib code calls the following for each nexthop:
>> - new route handler:
>> lwt build state (that parses RTA_ENCAP and returns
>> lwt state that lives in every fib_nh)
>> - del dump hanlder:
>> lwt release handler to release lwt state data
>> - route dump hanlder:
>> lwt dump encap to fill RTA_ENCAP data
>> - during input route lookup
>> sets dst->output to lwtunnel_output which
>> in turn calls the corresponding lwt tunnel
>> output function which applies the required
>> encap and xmits the packet
> Thanks for putting in the flag! I think introducing helpers for the
> lwt work would help as it can centralize the required ifdef magic
> and be defined as a static inline nop or macros so the core routing
> code doesn't need to have ifdefs spread around.
yes that would be better, let me see what I can do.
thanks,
^ permalink raw reply [flat|nested] 15+ messages in thread
* Summary lightweight tunnel discussion at NFWS
2015-06-19 4:49 [PATCH net-next RFC v2 2/3] ipv4: add support for light weight tunnel encap attributes Roopa Prabhu
` (2 preceding siblings ...)
2015-06-21 20:20 ` Thomas Graf
@ 2015-07-03 10:00 ` Thomas Graf
2015-07-05 6:21 ` roopa
3 siblings, 1 reply; 15+ messages in thread
From: Thomas Graf @ 2015-07-03 10:00 UTC (permalink / raw)
To: Roopa Prabhu; +Cc: ebiederm, rshearma, davem, netdev
On 06/18/15 at 09:49pm, Roopa Prabhu wrote:
> +#ifdef CONFIG_LWTUNNEL
> + if (fi->fib_nh->nh_lwtstate) {
> + struct lwtunnel_state *lwtstate;
> +
> + lwtstate = fi->fib_nh->nh_lwtstate;
> + if (nla_put_u16(skb, RTA_ENCAP_TYPE, lwtstate->type))
> + goto nla_put_failure;
> + lwtunnel_fill_encap(skb, lwtstate);
> + }
> }
> +#endif
Misplaced #endif ;-)
Other than that I managed to rebase my changes onto yours and it
looks clean.
Since we also discussed this a bit at NFWS, I'm enclosing a quick
summary:
* Overall consensus that a lightweight flow based encapsulation
makes sense.
* Realization that what we actually want is stackable skb metadata
between layers without over engineering it.
* Consensus to avoid adding it to skb_shared_info and try to reuse
the skb dst field.
* New dst_metadata type similar to xfrm_dst which can carry metadata
such as encapsulation instructions/information.
* Can be made stackable to implement nested encapsulation if needed.
Left out in the beginning to keep it simple.
* Possible optimization option by putting the dst_metadata into a
per cpu scratch buffer or stack without taking a reference and
only force the reference & allocation when the skb is about to
be queued. The regular fast path should never queue a skb with
dst metadata attached.
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: Summary lightweight tunnel discussion at NFWS
2015-07-03 10:00 ` Summary lightweight tunnel discussion at NFWS Thomas Graf
@ 2015-07-05 6:21 ` roopa
2015-07-06 13:03 ` Thomas Graf
0 siblings, 1 reply; 15+ messages in thread
From: roopa @ 2015-07-05 6:21 UTC (permalink / raw)
To: Thomas Graf; +Cc: ebiederm, rshearma, davem, netdev
On 7/3/15, 3:00 AM, Thomas Graf wrote:
> On 06/18/15 at 09:49pm, Roopa Prabhu wrote:
>> +#ifdef CONFIG_LWTUNNEL
>> + if (fi->fib_nh->nh_lwtstate) {
>> + struct lwtunnel_state *lwtstate;
>> +
>> + lwtstate = fi->fib_nh->nh_lwtstate;
>> + if (nla_put_u16(skb, RTA_ENCAP_TYPE, lwtstate->type))
>> + goto nla_put_failure;
>> + lwtunnel_fill_encap(skb, lwtstate);
>> + }
>> }
>> +#endif
> Misplaced #endif ;-)
Thx. I have fixed this since,...did not realize it came in as part of
this RFC series.
>
> Other than that I managed to rebase my changes onto yours and it
> looks clean.
Glad to know!. thanks Thomas. I had a few more changes (mostly
cleanup/bug fixes, ipv6 support and mostly earlier feedback from you)
in my local clone, pushed it to my github tree just now.
This also tries to not use CONFIG_LWTUNNEL all over the place. I had it
that way initially also because of fib struct members
under #ifdef CONFIG_LWTUNNEL. (If we think at a later point that it is
better to #ifdef CONFIG_LWTUNNEL fib struct members,
I can bring some of that back in). And, Only control path (rtnetlink)
for ipv6 mpls iptunnels has been tested.
>
> Since we also discussed this a bit at NFWS, I'm enclosing a quick
> summary:
>
> * Overall consensus that a lightweight flow based encapsulation
> makes sense.
> * Realization that what we actually want is stackable skb metadata
> between layers without over engineering it.
> * Consensus to avoid adding it to skb_shared_info and try to reuse
> the skb dst field.
> * New dst_metadata type similar to xfrm_dst which can carry metadata
> such as encapsulation instructions/information.
> * Can be made stackable to implement nested encapsulation if needed.
> Left out in the beginning to keep it simple.
> * Possible optimization option by putting the dst_metadata into a
> per cpu scratch buffer or stack without taking a reference and
> only force the reference & allocation when the skb is about to
> be queued. The regular fast path should never queue a skb with
> dst metadata attached.
Thanks for the summary. this helps.
I have been thinking of moving lwtstate from rtable to struct dst_entry.
I will also look at the dst_metadata.
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: Summary lightweight tunnel discussion at NFWS
2015-07-05 6:21 ` roopa
@ 2015-07-06 13:03 ` Thomas Graf
2015-07-06 15:24 ` roopa
0 siblings, 1 reply; 15+ messages in thread
From: Thomas Graf @ 2015-07-06 13:03 UTC (permalink / raw)
To: roopa; +Cc: ebiederm, rshearma, davem, netdev
On 07/04/15 at 11:21pm, roopa wrote:
> Thx. I have fixed this since,...did not realize it came in as part of this
> RFC series.
> >
> >Other than that I managed to rebase my changes onto yours and it
> >looks clean.
> Glad to know!. thanks Thomas. I had a few more changes (mostly cleanup/bug
> fixes, ipv6 support and mostly earlier feedback from you)
> in my local clone, pushed it to my github tree just now.
> This also tries to not use CONFIG_LWTUNNEL all over the place. I had it that
> way initially also because of fib struct members
The integration into the routing code looks much better now. Any
chance you can rebase your tree and squash it into logical
commits? I'm rebasing again onto yours and I think we should submit
all of this in a single series so everything can be reviewed in a
single thread.
> under #ifdef CONFIG_LWTUNNEL. (If we think at a later point that it is
> better to #ifdef CONFIG_LWTUNNEL fib struct members,
> I can bring some of that back in). And, Only control path (rtnetlink) for
> ipv6 mpls iptunnels has been tested.
All of this is only useful if distros enable this by default which
minimize the value of a config option quite a bit. I think what you
have your latest tree is a good balance.
> I have been thinking of moving lwtstate from rtable to struct dst_entry.
> I will also look at the dst_metadata.
I thought about the same. The introduction of dst_metadata makes
that a bit pointless because we don't want to store a pointer to the
tunnel metadata but allocate the metadata as a dst directly to avoid
the indirection. We need the separate code paths anyway until v6 and
v4 routing are merged so we would save very little while penalizing
everyone except rtable and fib6.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: Summary lightweight tunnel discussion at NFWS
2015-07-06 13:03 ` Thomas Graf
@ 2015-07-06 15:24 ` roopa
0 siblings, 0 replies; 15+ messages in thread
From: roopa @ 2015-07-06 15:24 UTC (permalink / raw)
To: Thomas Graf; +Cc: ebiederm, rshearma, davem, netdev
On 7/6/15, 6:03 AM, Thomas Graf wrote:
> The integration into the routing code looks much better now. Any
> chance you can rebase your tree and squash it into logical
> commits?
Will do today.
> I'm rebasing again onto yours and I think we should submit
> all of this in a single series so everything can be reviewed in a
> single thread.
agreed.
>
> All of this is only useful if distros enable this by default which
> minimize the value of a config option quite a bit. I think what you
> have your latest tree is a good balance.
ok
>
>> I have been thinking of moving lwtstate from rtable to struct dst_entry.
>> I will also look at the dst_metadata.
> I thought about the same. The introduction of dst_metadata makes
> that a bit pointless because we don't want to store a pointer to the
> tunnel metadata but allocate the metadata as a dst directly to avoid
> the indirection. We need the separate code paths anyway until v6 and
> v4 routing are merged so we would save very little while penalizing
> everyone except rtable and fib6.
agreed,
thanks,
Roopa
^ permalink raw reply [flat|nested] 15+ messages in thread
end of thread, other threads:[~2015-07-06 15:24 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-06-19 4:49 [PATCH net-next RFC v2 2/3] ipv4: add support for light weight tunnel encap attributes Roopa Prabhu
2015-06-19 6:59 ` Julian Anastasov
2015-06-19 14:19 ` roopa
2015-06-19 14:55 ` Robert Shearman
2015-06-19 15:15 ` roopa
2015-06-19 15:19 ` Robert Shearman
2015-06-19 15:28 ` roopa
2015-06-19 17:17 ` Robert Shearman
2015-06-19 18:42 ` roopa
2015-06-21 20:20 ` Thomas Graf
2015-06-22 2:30 ` roopa
2015-07-03 10:00 ` Summary lightweight tunnel discussion at NFWS Thomas Graf
2015-07-05 6:21 ` roopa
2015-07-06 13:03 ` Thomas Graf
2015-07-06 15:24 ` roopa
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).