Re: [PATCH net-next 1/3] mpls: move mpls_route nexthop fields to a new nhlfe struct

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Robert Shearman <rshearma@brocade.com>
To: Roopa Prabhu <roopa@cumulusnetworks.com>, <davem@davemloft.net>
Cc: <ebiederm@xmission.com>, <netdev@vger.kernel.org>
Subject: Re: [PATCH net-next 1/3] mpls: move mpls_route nexthop fields to a new nhlfe struct
Date: Wed, 12 Aug 2015 20:15:07 +0100	[thread overview]
Message-ID: <55CB9B3B.1060809@brocade.com> (raw)
In-Reply-To: <1439329548-50935-2-git-send-email-roopa@cumulusnetworks.com>

On 11/08/15 22:45, Roopa Prabhu wrote:
> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>
> moves mpls_route nexthop fields to a new mpls_nhlfe
> struct. mpls_nhlfe represents a mpls nexthop label forwarding entry.
> It prepares mpls route structure for multipath support.
>
> In the process moves mpls_route structure into internal.h.

Is there a requirement for moving this and the new datastructures into 
internal.h? I may have missed it, but I don't see any dependency on this 
in this patch series.

> Moves some of the code from mpls_route_add into a separate mpls
> nhlfe build function. changed mpls_rt_alloc to take number of
> nexthops as argument.
>
> A mpls route can point to multiple mpls_nhlfe. This patch
> does not support multipath yet, hence the rest of the changes
> assume that a mpls route points to a single mpls_nhlfe
>
> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
> ---
>   net/mpls/af_mpls.c  |  225 ++++++++++++++++++++++++++++-----------------------
>   net/mpls/internal.h |   35 ++++++++
>   2 files changed, 158 insertions(+), 102 deletions(-)
>
> diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
> index 8c5707d..cf86e9d 100644
> --- a/net/mpls/af_mpls.c
> +++ b/net/mpls/af_mpls.c
> @@ -21,35 +21,6 @@
>   #endif
>   #include "internal.h"
>
> -#define LABEL_NOT_SPECIFIED (1<<20)
> -#define MAX_NEW_LABELS 2
> -
> -/* This maximum ha length copied from the definition of struct neighbour */
> -#define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long)))
> -
> -enum mpls_payload_type {
> -	MPT_UNSPEC, /* IPv4 or IPv6 */
> -	MPT_IPV4 = 4,
> -	MPT_IPV6 = 6,
> -
> -	/* Other types not implemented:
> -	 *  - Pseudo-wire with or without control word (RFC4385)
> -	 *  - GAL (RFC5586)
> -	 */
> -};
> -
> -struct mpls_route { /* next hop label forwarding entry */
> -	struct net_device __rcu *rt_dev;
> -	struct rcu_head		rt_rcu;
> -	u32			rt_label[MAX_NEW_LABELS];
> -	u8			rt_protocol; /* routing protocol that set this entry */
> -	u8                      rt_payload_type;
> -	u8			rt_labels;
> -	u8			rt_via_alen;
> -	u8			rt_via_table;
> -	u8			rt_via[0];
> -};
> -
>   static int zero = 0;
>   static int label_limit = (1 << 20) - 1;
>
...
> @@ -281,13 +254,15 @@ struct mpls_route_config {
>   	struct nl_info		rc_nlinfo;
>   };
>
> -static struct mpls_route *mpls_rt_alloc(size_t alen)
> +static struct mpls_route *mpls_rt_alloc(int num_nh)
>   {
>   	struct mpls_route *rt;
>
> -	rt = kzalloc(sizeof(*rt) + alen, GFP_KERNEL);
> +	rt = kzalloc(sizeof(*rt) + (num_nh * sizeof(struct mpls_nhlfe)),

How about this instead:
   offsetof(typeof(*rt), rt_nh[num_nh])
?

That way, you don't need to write out the type of rt_nh here.

> +		     GFP_KERNEL);
>   	if (rt)
> -		rt->rt_via_alen = alen;
> +		rt->rt_nhn = num_nh;
> +
>   	return rt;
>   }
>
> @@ -322,7 +297,7 @@ static void mpls_route_update(struct net *net, unsigned index,
>
>   	platform_label = rtnl_dereference(net->mpls.platform_label);
>   	rt = rtnl_dereference(platform_label[index]);
> -	if (!dev || (rt && (rtnl_dereference(rt->rt_dev) == dev))) {
> +	if (!dev || (rt && (rtnl_dereference(rt->rt_nh->nh_dev) == dev))) {
>   		rcu_assign_pointer(platform_label[index], new);
>   		old = rt;
>   	}
> @@ -406,23 +381,23 @@ static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr)
>   #endif
>
>   static struct net_device *find_outdev(struct net *net,
> -				      struct mpls_route_config *cfg)
> +				      struct mpls_nhlfe *nhlfe, int oif)
>   {
>   	struct net_device *dev = NULL;
>
> -	if (!cfg->rc_ifindex) {
> -		switch (cfg->rc_via_table) {
> +	if (!oif) {
> +		switch (nhlfe->nh_via_table) {
>   		case NEIGH_ARP_TABLE:
> -			dev = inet_fib_lookup_dev(net, cfg->rc_via);
> +			dev = inet_fib_lookup_dev(net, nhlfe->nh_via);
>   			break;
>   		case NEIGH_ND_TABLE:
> -			dev = inet6_fib_lookup_dev(net, cfg->rc_via);
> +			dev = inet6_fib_lookup_dev(net, nhlfe->nh_via);
>   			break;
>   		case NEIGH_LINK_TABLE:
>   			break;
>   		}
>   	} else {
> -		dev = dev_get_by_index(net, cfg->rc_ifindex);
> +		dev = dev_get_by_index(net, oif);
>   	}
>
>   	if (!dev)
> @@ -431,15 +406,81 @@ static struct net_device *find_outdev(struct net *net,
>   	return dev;
>   }
>
> +int mpls_nhlfe_assign_dev(struct net *net, struct mpls_nhlfe *nhlfe, int oif)
> +{
> +	struct net_device *dev = NULL;
> +	int err = -ENODEV;
> +
> +	dev = find_outdev(net, nhlfe, oif);
> +	if (IS_ERR(dev)) {
> +		err = PTR_ERR(dev);
> +		dev = NULL;
> +		goto errout;
> +	}
> +
> +	/* Ensure this is a supported device */
> +	err = -EINVAL;
> +	if (!mpls_dev_get(dev))
> +		goto errout;
> +
> +	/* For now just support ethernet devices */
> +	err = -EINVAL;
> +	if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK))
> +		goto errout;

Isn't this interface type check redundant with the mpls_dev_get call 
just above?

> +
> +	RCU_INIT_POINTER(nhlfe->nh_dev, dev);
> +	dev_put(dev);

Is it safe to release the reference to dev here? Prior to these changes, 
it was released after mpls_route_update is called in mpls_route_add - is 
that OK without a reference?

> +
> +	return 0;
> +
> +errout:
> +	if (dev)
> +		dev_put(dev);
> +	return err;
> +}
> +
> +static int mpls_nhlfe_build(struct mpls_route_config *cfg,
> +			    struct mpls_nhlfe *nhlfe)
> +{
> +	struct net *net = cfg->rc_nlinfo.nl_net;
> +	int err = -ENOMEM;
> +	int i;
> +
> +	if (!nhlfe)
> +		goto errout;
> +
> +	err = -EINVAL;
> +	/* Ensure only a supported number of labels are present */
> +	if (cfg->rc_output_labels > MAX_NEW_LABELS)
> +		goto errout;
> +
> +	nhlfe->nh_labels = cfg->rc_output_labels;
> +	for (i = 0; i < nhlfe->nh_labels; i++)
> +		nhlfe->nh_label[i] = cfg->rc_output_label[i];
> +
> +	nhlfe->nh_payload_type = cfg->rc_payload_type;
> +	nhlfe->nh_via_table = cfg->rc_via_table;
> +	memcpy(nhlfe->nh_via, cfg->rc_via, cfg->rc_via_alen);
> +	nhlfe->nh_via_alen = cfg->rc_via_alen;

Shouldn't this check that was removed from mpls_route_add be added here:

-	if ((cfg->rc_via_table == NEIGH_LINK_TABLE) &&
-	    (dev->addr_len != cfg->rc_via_alen))
-		goto errout;

?

> +
> +	err = mpls_nhlfe_assign_dev(net, nhlfe, cfg->rc_ifindex);
> +	if (err)
> +		goto errout;
> +
> +	return 0;
> +
> +errout:
> +	return err;
> +}
> +
>   static int mpls_route_add(struct mpls_route_config *cfg)
>   {
>   	struct mpls_route __rcu **platform_label;
>   	struct net *net = cfg->rc_nlinfo.nl_net;
> -	struct net_device *dev = NULL;
>   	struct mpls_route *rt, *old;
> -	unsigned index;
> -	int i;
>   	int err = -EINVAL;
> +	unsigned index;
> +	int nhs = 1; /* default to one nexthop */
>
>   	index = cfg->rc_label;
>
> @@ -457,27 +498,6 @@ static int mpls_route_add(struct mpls_route_config *cfg)
>   	if (index >= net->mpls.platform_labels)
>   		goto errout;
>
> -	/* Ensure only a supported number of labels are present */
> -	if (cfg->rc_output_labels > MAX_NEW_LABELS)
> -		goto errout;
> -
> -	dev = find_outdev(net, cfg);
> -	if (IS_ERR(dev)) {
> -		err = PTR_ERR(dev);
> -		dev = NULL;
> -		goto errout;
> -	}
> -
> -	/* Ensure this is a supported device */
> -	err = -EINVAL;
> -	if (!mpls_dev_get(dev))
> -		goto errout;
> -
> -	err = -EINVAL;
> -	if ((cfg->rc_via_table == NEIGH_LINK_TABLE) &&
> -	    (dev->addr_len != cfg->rc_via_alen))
> -		goto errout;
> -
>   	/* Append makes no sense with mpls */
>   	err = -EOPNOTSUPP;
>   	if (cfg->rc_nlflags & NLM_F_APPEND)
...
> diff --git a/net/mpls/internal.h b/net/mpls/internal.h
> index 2681a4b..f05e2e8 100644
> --- a/net/mpls/internal.h
> +++ b/net/mpls/internal.h
...
> @@ -21,6 +32,30 @@ struct mpls_dev {
>
>   struct sk_buff;
>
> +#define LABEL_NOT_SPECIFIED (1 << 20)
> +#define MAX_NEW_LABELS 2
> +
> +/* This maximum ha length copied from the definition of struct neighbour */
> +#define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long)))
> +
> +struct mpls_nhlfe { /* next hop label forwarding entry */

Can we just call this mpls_nh? IMHO, NHLFE is a bit of MPLS-specific 
jargon that doesn't really add anything and may impact readability for 
anybody familiar with other protocols, but not with the MPLS RFCs.

> +	struct net_device __rcu *nh_dev;
> +	u32			nh_label[MAX_NEW_LABELS];
> +	unsigned int		nh_flags;

Is it worth adding the nh_flags field? Unless I missed something it 
isn't used in this patch and isn't written in any subsequent patches.

> +	u8			nh_payload_type;

nh_payload_type isn't really an attribute of the nexthop, but of the 
route - it's signally the type of traffic that is using that route.

> +	u8			nh_labels;
> +	u8			nh_via_alen;
> +	u8			nh_via_table;
> +	u8			nh_via[MAX_VIA_ALEN];

MAX_VIA_ALEN is 32 bytes, so there is now a 28 byte overhead per nexthop 
for the common case of using an IPv4 nexthop. With a large number of 
routes/nexthops, this will become noticeable. If we avoided using a 
fixed allocation, it would mitigate this.

> +};
> +
> +struct mpls_route { /* next hop label forwarding entry */

Is it still the NHLFE? :-)

> +	struct rcu_head		rt_rcu;
> +	u8			rt_protocol;
> +	int			rt_nhn;
> +	struct mpls_nhlfe	rt_nh[0];
> +};
> +
>   static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb)
>   {
>   	return (struct mpls_shim_hdr *)skb_network_header(skb);
>

Thanks,
Rob

next prev parent reply	other threads:[~2015-08-12 19:15 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-08-11 21:45 [PATCH net-next 1/3] mpls: move mpls_route nexthop fields to a new nhlfe struct Roopa Prabhu
2015-08-12 19:15 ` Robert Shearman [this message]
2015-08-13  3:16   ` roopa
2015-08-13 14:01     ` Robert Shearman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=55CB9B3B.1060809@brocade.com \
    --to=rshearma@brocade.com \
    --cc=davem@davemloft.net \
    --cc=ebiederm@xmission.com \
    --cc=netdev@vger.kernel.org \
    --cc=roopa@cumulusnetworks.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.