From: Robert Shearman <rshearma@brocade.com>
To: Thomas Graf <tgraf@suug.ch>
Cc: <netdev@vger.kernel.org>,
"Eric W. Biederman" <ebiederm@xmission.com>,
roopa <roopa@cumulusnetworks.com>
Subject: Re: [RFC net-next 0/3] IP imposition of per-nh MPLS encap
Date: Tue, 2 Jun 2015 14:28:58 +0100 [thread overview]
Message-ID: <556DAF9A.9050505@brocade.com> (raw)
In-Reply-To: <20150602000603.GB18435@pox.localdomain>
On 02/06/15 01:06, Thomas Graf wrote:
> On 06/01/15 at 05:46pm, Robert Shearman wrote:
>> In order to be able to function as a Label Edge Router in an MPLS
>> network, it is necessary to be able to take IP packets and impose an
>> MPLS encap and forward them out. The traditional approach of setting
>> up an interface for each "tunnel" endpoint doesn't scale for the
>> common MPLS use-cases where each IP route tends to be assigned a
>> different label as encap.
>>
>> The solution suggested here for further discussion is to provide the
>> facility to define encap data on a per-nexthop basis using a new
>> netlink attribue, RTA_ENCAP, which would be opaque to the IPv4/IPv6
>> forwarding code, but interpreted by the virtual interface assigned to
>> the nexthop.
>
> RTA_ENCAP is currently a binary blob specific to each encapsulation
> type interface. I guess this should be converted to a set of nested
> Netlink attributes for each type of encap to make it extendible in
> the future.
Nesting attributes inside the RTA_ENCAP blob should be supported by the
patch series today. Something like this:
+enum rta_tunnel_t {
+ RTA_TUN_UNSPEC,
+ RTA_TUN_ID,
+ RTA_TUN_DST,
+ RTA_TUN_SRC,
+ RTA_TUN_TTL,
+ RTA_TUN_TOS,
+ RTA_TUN_SPORT,
+ RTA_TUN_DPORT,
+ RTA_TUN_FLAGS,
+ RTA_TUN_MAX,
+};
+
+static const struct nla_policy tunnel_policy[RTA_TUN_MAX + 1] = {
+ [RTA_TUN_ID] = { .type = NLA_U64 },
+ [RTA_TUN_DST] = { .type = NLA_U32 },
+ [RTA_TUN_SRC] = { .type = NLA_U32 },
+ [RTA_TUN_TTL] = { .type = NLA_U8 },
+ [RTA_TUN_TOS] = { .type = NLA_U8 },
+ [RTA_TUN_SPORT] = { .type = NLA_U16 },
+ [RTA_TUN_DPORT] = { .type = NLA_U16 },
+ [RTA_TUN_FLAGS] = { .type = NLA_U16 },
+};
+
+static int vxlan_parse_encap(const struct net_device *dev,
+ const struct nlattr *nla,
+ void *encap)
+{
+ if (encap) {
+ struct ip_tunnel_info *tun_info = encap;
+ struct nlattr *tb[RTA_TUN_MAX+1];
+ int err;
+
+ err = nla_parse_nested(tb, RTA_TUN_MAX, nla, tunnel_policy);
+ if (err < 0)
+ return err;
+
+ if (tb[RTA_TUN_ID])
+ tun_info->key.tun_id = nla_get_u64(tb[RTA_TUN_ID]);
+
+ if (tb[RTA_TUN_DST])
+ tun_info->key.ipv4_dst = nla_get_be32(tb[RTA_TUN_DST]);
+
+ if (tb[RTA_TUN_SRC])
+ tun_info->key.ipv4_src = nla_get_be32(tb[RTA_TUN_SRC]);
+
+ if (tb[RTA_TUN_TTL])
+ tun_info->key.ipv4_ttl = nla_get_u8(tb[RTA_TUN_TTL]);
+
+ if (tb[RTA_TUN_TOS])
+ tun_info->key.ipv4_tos = nla_get_u8(tb[RTA_TUN_TOS]);
+
+ if (tb[RTA_TUN_SPORT])
+ tun_info->key.tp_src = nla_get_be16(tb[RTA_TUN_SPORT]);
+
+ if (tb[RTA_TUN_DPORT])
+ tun_info->key.tp_dst = nla_get_be16(tb[RTA_TUN_DPORT]);
+
+ if (tb[RTA_TUN_FLAGS])
+ tun_info->key.tun_flags = nla_get_u16(tb[RTA_TUN_FLAGS]);
+
+ tun_info->options = NULL;
+ tun_info->options_len = 0;
+ }
+
+ return sizeof(struct ip_tunnel_info);
+}
+
+static int vxlan_fill_encap(const struct net_device *dev,
+ struct sk_buff *skb, int encap_len,
+ const void *encap)
+{
+ const struct ip_tunnel_info *tun_info = encap;
+ struct nlattr *encap_attr;
+
+ encap_attr = nla_nest_start(skb, RTA_ENCAP);
+ if (!encap_attr)
+ return -ENOMEM;
+
+ if (nla_put_u64(skb, RTA_TUN_ID, tun_info->key.tun_id) ||
+ nla_put_be32(skb, RTA_TUN_DST, tun_info->key.ipv4_dst) ||
+ nla_put_be32(skb, RTA_TUN_SRC, tun_info->key.ipv4_src) ||
+ nla_put_u8(skb, RTA_TUN_TOS, tun_info->key.ipv4_tos) ||
+ nla_put_u8(skb, RTA_TUN_TTL, tun_info->key.ipv4_ttl) ||
+ nla_put_u16(skb, RTA_TUN_SPORT, tun_info->key.tp_src) ||
+ nla_put_u16(skb, RTA_TUN_DPORT, tun_info->key.tp_dst) ||
+ nla_put_u16(skb, RTA_TUN_FLAGS, tun_info->key.tun_flags))
+ return -ENOMEM;
+
+ nla_nest_end(skb, encap_attr);
+
+ return 0;
+}
+
+static int vxlan_match_encap(const struct net_device *dev,
+ const struct nlattr *nla, int encap_len,
+ const void *encap)
+{
+ const struct ip_tunnel_info *tun_info = encap;
+ struct nlattr *tb[RTA_TUN_MAX+1];
+ int err;
+
+ err = nla_parse_nested(tb, RTA_TUN_MAX, nla, tunnel_policy);
+ if (err < 0)
+ return err;
+
+ if (tb[RTA_TUN_ID] &&
+ tun_info->key.tun_id != nla_get_u64(tb[RTA_TUN_ID]))
+ return 1;
+
+ if (tb[RTA_TUN_DST] &&
+ tun_info->key.ipv4_dst != nla_get_be32(tb[RTA_TUN_DST]))
+ return 1;
+
+ if (tb[RTA_TUN_SRC] &&
+ tun_info->key.ipv4_src != nla_get_be32(tb[RTA_TUN_SRC]))
+ return 1;
+
+ if (tb[RTA_TUN_TTL] &&
+ tun_info->key.ipv4_ttl != nla_get_u8(tb[RTA_TUN_TTL]))
+ return 1;
+
+ if (tb[RTA_TUN_TOS] &&
+ tun_info->key.ipv4_tos != nla_get_u8(tb[RTA_TUN_TOS]))
+ return 1;
+
+ if (tb[RTA_TUN_SPORT] &&
+ tun_info->key.tp_src != nla_get_be16(tb[RTA_TUN_SPORT]))
+ return 1;
+
+ if (tb[RTA_TUN_DPORT] &&
+ tun_info->key.tp_dst != nla_get_be16(tb[RTA_TUN_DPORT]))
+ return 1;
+
+ if (tb[RTA_TUN_FLAGS] &&
+ tun_info->key.tun_flags != nla_get_u16(tb[RTA_TUN_FLAGS]))
+ return 1;
+
+ return 0;
+}
+
static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
.kind = "vxlan",
.maxtype = IFLA_VXLAN_MAX,
@@ -2893,6 +3093,9 @@ static struct rtnl_link_ops vxlan_link_ops
__read_mostly = {
.get_size = vxlan_get_size,
.fill_info = vxlan_fill_info,
.get_link_net = vxlan_get_link_net,
+ .parse_encap = vxlan_parse_encap,
+ .fill_encap = vxlan_fill_encap,
+ .match_encap = vxlan_match_encap,
};
> What is your plan regarding the receive side and on the matching of
> encap fields? Storing the receive parameters is what lead me to
> storing it in skb_shared_info.
No plan for the receive side and it wouldn't easily fit in with my
approach, so you'll need to implement that separately.
Thanks,
Rob
next prev parent reply other threads:[~2015-06-02 13:30 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-06-01 16:46 [RFC net-next 0/3] IP imposition of per-nh MPLS encap Robert Shearman
2015-06-01 16:46 ` [RFC net-next 1/3] net: infra for per-nexthop encap data Robert Shearman
2015-06-02 18:15 ` Eric W. Biederman
2015-06-01 16:46 ` [RFC net-next 2/3] ipv4: storing and retrieval of per-nexthop encap Robert Shearman
2015-06-02 16:01 ` roopa
2015-06-02 16:35 ` Robert Shearman
2015-06-01 16:46 ` [RFC net-next 3/3] mpls: new ipmpls device for encapsulating IP packets as mpls Robert Shearman
2015-06-02 16:15 ` roopa
2015-06-02 16:33 ` Robert Shearman
2015-06-02 18:57 ` roopa
2015-06-02 21:06 ` Robert Shearman
2015-06-03 18:43 ` Vivek Venkatraman
2015-06-04 18:46 ` Robert Shearman
2015-06-04 21:38 ` Vivek Venkatraman
2015-06-02 18:26 ` Eric W. Biederman
2015-06-02 21:37 ` Thomas Graf
2015-06-02 22:48 ` Eric W. Biederman
2015-06-02 23:23 ` Eric W. Biederman
2015-06-03 9:50 ` Thomas Graf
2015-06-02 0:06 ` [RFC net-next 0/3] IP imposition of per-nh MPLS encap Thomas Graf
2015-06-02 13:28 ` Robert Shearman [this message]
2015-06-02 21:43 ` Thomas Graf
2015-06-03 13:30 ` Robert Shearman
2015-06-02 15:31 ` roopa
2015-06-02 18:30 ` Eric W. Biederman
2015-06-02 18:39 ` roopa
2015-06-02 18:11 ` Eric W. Biederman
2015-06-02 20:57 ` Robert Shearman
2015-06-02 21:10 ` Eric W. Biederman
2015-06-02 22:15 ` Robert Shearman
2015-06-02 22:58 ` Eric W. Biederman
2015-06-04 15:12 ` Nicolas Dichtel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=556DAF9A.9050505@brocade.com \
--to=rshearma@brocade.com \
--cc=ebiederm@xmission.com \
--cc=netdev@vger.kernel.org \
--cc=roopa@cumulusnetworks.com \
--cc=tgraf@suug.ch \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.