From: Robert Shearman <rshearma@brocade.com>
To: Thomas Graf <tgraf@suug.ch>
Cc: <netdev@vger.kernel.org>,
"Eric W. Biederman" <ebiederm@xmission.com>,
roopa <roopa@cumulusnetworks.com>
Subject: Re: [RFC net-next 0/3] IP imposition of per-nh MPLS encap
Date: Tue, 2 Jun 2015 14:28:58 +0100 [thread overview]
Message-ID: <556DAF9A.9050505@brocade.com> (raw)
In-Reply-To: <20150602000603.GB18435@pox.localdomain>
On 02/06/15 01:06, Thomas Graf wrote:
> On 06/01/15 at 05:46pm, Robert Shearman wrote:
>> In order to be able to function as a Label Edge Router in an MPLS
>> network, it is necessary to be able to take IP packets and impose an
>> MPLS encap and forward them out. The traditional approach of setting
>> up an interface for each "tunnel" endpoint doesn't scale for the
>> common MPLS use-cases where each IP route tends to be assigned a
>> different label as encap.
>>
>> The solution suggested here for further discussion is to provide the
>> facility to define encap data on a per-nexthop basis using a new
>> netlink attribue, RTA_ENCAP, which would be opaque to the IPv4/IPv6
>> forwarding code, but interpreted by the virtual interface assigned to
>> the nexthop.
>
> RTA_ENCAP is currently a binary blob specific to each encapsulation
> type interface. I guess this should be converted to a set of nested
> Netlink attributes for each type of encap to make it extendible in
> the future.
Nesting attributes inside the RTA_ENCAP blob should be supported by the
patch series today. Something like this:
+enum rta_tunnel_t {
+ RTA_TUN_UNSPEC,
+ RTA_TUN_ID,
+ RTA_TUN_DST,
+ RTA_TUN_SRC,
+ RTA_TUN_TTL,
+ RTA_TUN_TOS,
+ RTA_TUN_SPORT,
+ RTA_TUN_DPORT,
+ RTA_TUN_FLAGS,
+ RTA_TUN_MAX,
+};
+
+static const struct nla_policy tunnel_policy[RTA_TUN_MAX + 1] = {
+ [RTA_TUN_ID] = { .type = NLA_U64 },
+ [RTA_TUN_DST] = { .type = NLA_U32 },
+ [RTA_TUN_SRC] = { .type = NLA_U32 },
+ [RTA_TUN_TTL] = { .type = NLA_U8 },
+ [RTA_TUN_TOS] = { .type = NLA_U8 },
+ [RTA_TUN_SPORT] = { .type = NLA_U16 },
+ [RTA_TUN_DPORT] = { .type = NLA_U16 },
+ [RTA_TUN_FLAGS] = { .type = NLA_U16 },
+};
+
+static int vxlan_parse_encap(const struct net_device *dev,
+ const struct nlattr *nla,
+ void *encap)
+{
+ if (encap) {
+ struct ip_tunnel_info *tun_info = encap;
+ struct nlattr *tb[RTA_TUN_MAX+1];
+ int err;
+
+ err = nla_parse_nested(tb, RTA_TUN_MAX, nla, tunnel_policy);
+ if (err < 0)
+ return err;
+
+ if (tb[RTA_TUN_ID])
+ tun_info->key.tun_id = nla_get_u64(tb[RTA_TUN_ID]);
+
+ if (tb[RTA_TUN_DST])
+ tun_info->key.ipv4_dst = nla_get_be32(tb[RTA_TUN_DST]);
+
+ if (tb[RTA_TUN_SRC])
+ tun_info->key.ipv4_src = nla_get_be32(tb[RTA_TUN_SRC]);
+
+ if (tb[RTA_TUN_TTL])
+ tun_info->key.ipv4_ttl = nla_get_u8(tb[RTA_TUN_TTL]);
+
+ if (tb[RTA_TUN_TOS])
+ tun_info->key.ipv4_tos = nla_get_u8(tb[RTA_TUN_TOS]);
+
+ if (tb[RTA_TUN_SPORT])
+ tun_info->key.tp_src = nla_get_be16(tb[RTA_TUN_SPORT]);
+
+ if (tb[RTA_TUN_DPORT])
+ tun_info->key.tp_dst = nla_get_be16(tb[RTA_TUN_DPORT]);
+
+ if (tb[RTA_TUN_FLAGS])
+ tun_info->key.tun_flags = nla_get_u16(tb[RTA_TUN_FLAGS]);
+
+ tun_info->options = NULL;
+ tun_info->options_len = 0;
+ }
+
+ return sizeof(struct ip_tunnel_info);
+}
+
+static int vxlan_fill_encap(const struct net_device *dev,
+ struct sk_buff *skb, int encap_len,
+ const void *encap)
+{
+ const struct ip_tunnel_info *tun_info = encap;
+ struct nlattr *encap_attr;
+
+ encap_attr = nla_nest_start(skb, RTA_ENCAP);
+ if (!encap_attr)
+ return -ENOMEM;
+
+ if (nla_put_u64(skb, RTA_TUN_ID, tun_info->key.tun_id) ||
+ nla_put_be32(skb, RTA_TUN_DST, tun_info->key.ipv4_dst) ||
+ nla_put_be32(skb, RTA_TUN_SRC, tun_info->key.ipv4_src) ||
+ nla_put_u8(skb, RTA_TUN_TOS, tun_info->key.ipv4_tos) ||
+ nla_put_u8(skb, RTA_TUN_TTL, tun_info->key.ipv4_ttl) ||
+ nla_put_u16(skb, RTA_TUN_SPORT, tun_info->key.tp_src) ||
+ nla_put_u16(skb, RTA_TUN_DPORT, tun_info->key.tp_dst) ||
+ nla_put_u16(skb, RTA_TUN_FLAGS, tun_info->key.tun_flags))
+ return -ENOMEM;
+
+ nla_nest_end(skb, encap_attr);
+
+ return 0;
+}
+
+static int vxlan_match_encap(const struct net_device *dev,
+ const struct nlattr *nla, int encap_len,
+ const void *encap)
+{
+ const struct ip_tunnel_info *tun_info = encap;
+ struct nlattr *tb[RTA_TUN_MAX+1];
+ int err;
+
+ err = nla_parse_nested(tb, RTA_TUN_MAX, nla, tunnel_policy);
+ if (err < 0)
+ return err;
+
+ if (tb[RTA_TUN_ID] &&
+ tun_info->key.tun_id != nla_get_u64(tb[RTA_TUN_ID]))
+ return 1;
+
+ if (tb[RTA_TUN_DST] &&
+ tun_info->key.ipv4_dst != nla_get_be32(tb[RTA_TUN_DST]))
+ return 1;
+
+ if (tb[RTA_TUN_SRC] &&
+ tun_info->key.ipv4_src != nla_get_be32(tb[RTA_TUN_SRC]))
+ return 1;
+
+ if (tb[RTA_TUN_TTL] &&
+ tun_info->key.ipv4_ttl != nla_get_u8(tb[RTA_TUN_TTL]))
+ return 1;
+
+ if (tb[RTA_TUN_TOS] &&
+ tun_info->key.ipv4_tos != nla_get_u8(tb[RTA_TUN_TOS]))
+ return 1;
+
+ if (tb[RTA_TUN_SPORT] &&
+ tun_info->key.tp_src != nla_get_be16(tb[RTA_TUN_SPORT]))
+ return 1;
+
+ if (tb[RTA_TUN_DPORT] &&
+ tun_info->key.tp_dst != nla_get_be16(tb[RTA_TUN_DPORT]))
+ return 1;
+
+ if (tb[RTA_TUN_FLAGS] &&
+ tun_info->key.tun_flags != nla_get_u16(tb[RTA_TUN_FLAGS]))
+ return 1;
+
+ return 0;
+}
+
static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
.kind = "vxlan",
.maxtype = IFLA_VXLAN_MAX,
@@ -2893,6 +3093,9 @@ static struct rtnl_link_ops vxlan_link_ops
__read_mostly = {
.get_size = vxlan_get_size,
.fill_info = vxlan_fill_info,
.get_link_net = vxlan_get_link_net,
+ .parse_encap = vxlan_parse_encap,
+ .fill_encap = vxlan_fill_encap,
+ .match_encap = vxlan_match_encap,
};
> What is your plan regarding the receive side and on the matching of
> encap fields? Storing the receive parameters is what lead me to
> storing it in skb_shared_info.
No plan for the receive side and it wouldn't easily fit in with my
approach, so you'll need to implement that separately.
Thanks,
Rob
next prev parent reply other threads:[~2015-06-02 13:30 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-06-01 16:46 [RFC net-next 0/3] IP imposition of per-nh MPLS encap Robert Shearman
2015-06-01 16:46 ` [RFC net-next 1/3] net: infra for per-nexthop encap data Robert Shearman
2015-06-02 18:15 ` Eric W. Biederman
2015-06-01 16:46 ` [RFC net-next 2/3] ipv4: storing and retrieval of per-nexthop encap Robert Shearman
2015-06-02 16:01 ` roopa
2015-06-02 16:35 ` Robert Shearman
2015-06-01 16:46 ` [RFC net-next 3/3] mpls: new ipmpls device for encapsulating IP packets as mpls Robert Shearman
2015-06-02 16:15 ` roopa
2015-06-02 16:33 ` Robert Shearman
2015-06-02 18:57 ` roopa
2015-06-02 21:06 ` Robert Shearman
2015-06-03 18:43 ` Vivek Venkatraman
2015-06-04 18:46 ` Robert Shearman
2015-06-04 21:38 ` Vivek Venkatraman
2015-06-02 18:26 ` Eric W. Biederman
2015-06-02 21:37 ` Thomas Graf
2015-06-02 22:48 ` Eric W. Biederman
2015-06-02 23:23 ` Eric W. Biederman
2015-06-03 9:50 ` Thomas Graf
2015-06-02 0:06 ` [RFC net-next 0/3] IP imposition of per-nh MPLS encap Thomas Graf
2015-06-02 13:28 ` Robert Shearman [this message]
2015-06-02 21:43 ` Thomas Graf
2015-06-03 13:30 ` Robert Shearman
2015-06-02 15:31 ` roopa
2015-06-02 18:30 ` Eric W. Biederman
2015-06-02 18:39 ` roopa
2015-06-02 18:11 ` Eric W. Biederman
2015-06-02 20:57 ` Robert Shearman
2015-06-02 21:10 ` Eric W. Biederman
2015-06-02 22:15 ` Robert Shearman
2015-06-02 22:58 ` Eric W. Biederman
2015-06-04 15:12 ` Nicolas Dichtel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=556DAF9A.9050505@brocade.com \
--to=rshearma@brocade.com \
--cc=ebiederm@xmission.com \
--cc=netdev@vger.kernel.org \
--cc=roopa@cumulusnetworks.com \
--cc=tgraf@suug.ch \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).