Re: [RFC net-next 0/3] IP imposition of per-nh MPLS encap

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Robert Shearman <rshearma@brocade.com>
To: Thomas Graf <tgraf@suug.ch>
Cc: <netdev@vger.kernel.org>,
	"Eric W. Biederman" <ebiederm@xmission.com>,
	roopa <roopa@cumulusnetworks.com>
Subject: Re: [RFC net-next 0/3] IP imposition of per-nh MPLS encap
Date: Tue, 2 Jun 2015 14:28:58 +0100	[thread overview]
Message-ID: <556DAF9A.9050505@brocade.com> (raw)
In-Reply-To: <20150602000603.GB18435@pox.localdomain>

On 02/06/15 01:06, Thomas Graf wrote:
> On 06/01/15 at 05:46pm, Robert Shearman wrote:
>> In order to be able to function as a Label Edge Router in an MPLS
>> network, it is necessary to be able to take IP packets and impose an
>> MPLS encap and forward them out. The traditional approach of setting
>> up an interface for each "tunnel" endpoint doesn't scale for the
>> common MPLS use-cases where each IP route tends to be assigned a
>> different label as encap.
>>
>> The solution suggested here for further discussion is to provide the
>> facility to define encap data on a per-nexthop basis using a new
>> netlink attribue, RTA_ENCAP, which would be opaque to the IPv4/IPv6
>> forwarding code, but interpreted by the virtual interface assigned to
>> the nexthop.
>
> RTA_ENCAP is currently a binary blob specific to each encapsulation
> type interface. I guess this should be converted to a set of nested
> Netlink attributes for each type of encap to make it extendible in
> the future.

Nesting attributes inside the RTA_ENCAP blob should be supported by the 
patch series today. Something like this:

+enum rta_tunnel_t {
+	RTA_TUN_UNSPEC,
+	RTA_TUN_ID,
+	RTA_TUN_DST,
+	RTA_TUN_SRC,
+	RTA_TUN_TTL,
+	RTA_TUN_TOS,
+	RTA_TUN_SPORT,
+	RTA_TUN_DPORT,
+	RTA_TUN_FLAGS,
+	RTA_TUN_MAX,
+};
+
+static const struct nla_policy tunnel_policy[RTA_TUN_MAX + 1] = {
+	[RTA_TUN_ID]		= { .type = NLA_U64 },
+	[RTA_TUN_DST]		= { .type = NLA_U32 },
+	[RTA_TUN_SRC]		= { .type = NLA_U32 },
+	[RTA_TUN_TTL]		= { .type = NLA_U8 },
+	[RTA_TUN_TOS]		= { .type = NLA_U8 },
+	[RTA_TUN_SPORT]		= { .type = NLA_U16 },
+	[RTA_TUN_DPORT]		= { .type = NLA_U16 },
+	[RTA_TUN_FLAGS]		= { .type = NLA_U16 },
+};
+
+static int vxlan_parse_encap(const struct net_device *dev,
+			     const struct nlattr *nla,
+			     void *encap)
+{
+	if (encap) {
+		struct ip_tunnel_info *tun_info = encap;
+		struct nlattr *tb[RTA_TUN_MAX+1];
+		int err;
+
+		err = nla_parse_nested(tb, RTA_TUN_MAX, nla, tunnel_policy);
+		if (err < 0)
+			return err;
+
+		if (tb[RTA_TUN_ID])
+			tun_info->key.tun_id = nla_get_u64(tb[RTA_TUN_ID]);
+
+		if (tb[RTA_TUN_DST])
+			tun_info->key.ipv4_dst = nla_get_be32(tb[RTA_TUN_DST]);
+
+		if (tb[RTA_TUN_SRC])
+			tun_info->key.ipv4_src = nla_get_be32(tb[RTA_TUN_SRC]);
+
+		if (tb[RTA_TUN_TTL])
+			tun_info->key.ipv4_ttl = nla_get_u8(tb[RTA_TUN_TTL]);
+
+		if (tb[RTA_TUN_TOS])
+			tun_info->key.ipv4_tos = nla_get_u8(tb[RTA_TUN_TOS]);
+
+		if (tb[RTA_TUN_SPORT])
+			tun_info->key.tp_src = nla_get_be16(tb[RTA_TUN_SPORT]);
+
+		if (tb[RTA_TUN_DPORT])
+			tun_info->key.tp_dst = nla_get_be16(tb[RTA_TUN_DPORT]);
+
+		if (tb[RTA_TUN_FLAGS])
+			tun_info->key.tun_flags = nla_get_u16(tb[RTA_TUN_FLAGS]);
+
+		tun_info->options = NULL;
+		tun_info->options_len = 0;
+	}
+
+	return sizeof(struct ip_tunnel_info);
+}
+
+static int vxlan_fill_encap(const struct net_device *dev,
+			    struct sk_buff *skb, int encap_len,
+			    const void *encap)
+{
+	const struct ip_tunnel_info *tun_info = encap;
+	struct nlattr *encap_attr;
+
+	encap_attr = nla_nest_start(skb, RTA_ENCAP);
+	if (!encap_attr)
+		return -ENOMEM;
+
+	if (nla_put_u64(skb, RTA_TUN_ID, tun_info->key.tun_id) ||
+	    nla_put_be32(skb, RTA_TUN_DST, tun_info->key.ipv4_dst) ||
+	    nla_put_be32(skb, RTA_TUN_SRC, tun_info->key.ipv4_src) ||
+	    nla_put_u8(skb, RTA_TUN_TOS, tun_info->key.ipv4_tos) ||
+	    nla_put_u8(skb, RTA_TUN_TTL, tun_info->key.ipv4_ttl) ||
+	    nla_put_u16(skb, RTA_TUN_SPORT, tun_info->key.tp_src) ||
+	    nla_put_u16(skb, RTA_TUN_DPORT, tun_info->key.tp_dst) ||
+	    nla_put_u16(skb, RTA_TUN_FLAGS, tun_info->key.tun_flags))
+		return -ENOMEM;
+
+	nla_nest_end(skb, encap_attr);
+
+	return 0;
+}
+
+static int vxlan_match_encap(const struct net_device *dev,
+			     const struct nlattr *nla, int encap_len,
+			     const void *encap)
+{
+	const struct ip_tunnel_info *tun_info = encap;
+	struct nlattr *tb[RTA_TUN_MAX+1];
+	int err;
+
+	err = nla_parse_nested(tb, RTA_TUN_MAX, nla, tunnel_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[RTA_TUN_ID] &&
+	    tun_info->key.tun_id != nla_get_u64(tb[RTA_TUN_ID]))
+		return 1;
+
+	if (tb[RTA_TUN_DST] &&
+	    tun_info->key.ipv4_dst != nla_get_be32(tb[RTA_TUN_DST]))
+		return 1;
+
+	if (tb[RTA_TUN_SRC] &&
+	    tun_info->key.ipv4_src != nla_get_be32(tb[RTA_TUN_SRC]))
+		return 1;
+
+	if (tb[RTA_TUN_TTL] &&
+	    tun_info->key.ipv4_ttl != nla_get_u8(tb[RTA_TUN_TTL]))
+		return 1;
+
+	if (tb[RTA_TUN_TOS] &&
+	    tun_info->key.ipv4_tos != nla_get_u8(tb[RTA_TUN_TOS]))
+		return 1;
+
+	if (tb[RTA_TUN_SPORT] &&
+	    tun_info->key.tp_src != nla_get_be16(tb[RTA_TUN_SPORT]))
+		return 1;
+
+	if (tb[RTA_TUN_DPORT] &&
+	    tun_info->key.tp_dst != nla_get_be16(tb[RTA_TUN_DPORT]))
+		return 1;
+
+	if (tb[RTA_TUN_FLAGS] &&
+	    tun_info->key.tun_flags != nla_get_u16(tb[RTA_TUN_FLAGS]))
+		return 1;
+
+	return 0;
+}
+
  static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
  	.kind		= "vxlan",
  	.maxtype	= IFLA_VXLAN_MAX,
@@ -2893,6 +3093,9 @@ static struct rtnl_link_ops vxlan_link_ops 
__read_mostly = {
  	.get_size	= vxlan_get_size,
  	.fill_info	= vxlan_fill_info,
  	.get_link_net	= vxlan_get_link_net,
+	.parse_encap	= vxlan_parse_encap,
+	.fill_encap	= vxlan_fill_encap,
+	.match_encap	= vxlan_match_encap,
  };


> What is your plan regarding the receive side and on the matching of
> encap fields? Storing the receive parameters is what lead me to
> storing it in skb_shared_info.

No plan for the receive side and it wouldn't easily fit in with my 
approach, so you'll need to implement that separately.

Thanks,
Rob

next prev parent reply	other threads:[~2015-06-02 13:30 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-06-01 16:46 [RFC net-next 0/3] IP imposition of per-nh MPLS encap Robert Shearman
2015-06-01 16:46 ` [RFC net-next 1/3] net: infra for per-nexthop encap data Robert Shearman
2015-06-02 18:15   ` Eric W. Biederman
2015-06-01 16:46 ` [RFC net-next 2/3] ipv4: storing and retrieval of per-nexthop encap Robert Shearman
2015-06-02 16:01   ` roopa
2015-06-02 16:35     ` Robert Shearman
2015-06-01 16:46 ` [RFC net-next 3/3] mpls: new ipmpls device for encapsulating IP packets as mpls Robert Shearman
2015-06-02 16:15   ` roopa
2015-06-02 16:33     ` Robert Shearman
2015-06-02 18:57       ` roopa
2015-06-02 21:06         ` Robert Shearman
2015-06-03 18:43           ` Vivek Venkatraman
2015-06-04 18:46             ` Robert Shearman
2015-06-04 21:38               ` Vivek Venkatraman
2015-06-02 18:26   ` Eric W. Biederman
2015-06-02 21:37     ` Thomas Graf
2015-06-02 22:48       ` Eric W. Biederman
2015-06-02 23:23       ` Eric W. Biederman
2015-06-03  9:50         ` Thomas Graf
2015-06-02  0:06 ` [RFC net-next 0/3] IP imposition of per-nh MPLS encap Thomas Graf
2015-06-02 13:28   ` Robert Shearman [this message]
2015-06-02 21:43     ` Thomas Graf
2015-06-03 13:30       ` Robert Shearman
2015-06-02 15:31 ` roopa
2015-06-02 18:30   ` Eric W. Biederman
2015-06-02 18:39     ` roopa
2015-06-02 18:11 ` Eric W. Biederman
2015-06-02 20:57   ` Robert Shearman
2015-06-02 21:10     ` Eric W. Biederman
2015-06-02 22:15       ` Robert Shearman
2015-06-02 22:58         ` Eric W. Biederman
2015-06-04 15:12           ` Nicolas Dichtel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=556DAF9A.9050505@brocade.com \
    --to=rshearma@brocade.com \
    --cc=ebiederm@xmission.com \
    --cc=netdev@vger.kernel.org \
    --cc=roopa@cumulusnetworks.com \
    --cc=tgraf@suug.ch \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).