From: Robert Shearman <rshearma@brocade.com>
To: Roopa Prabhu <roopa@cumulusnetworks.com>, <davem@davemloft.net>
Cc: <ebiederm@xmission.com>, <netdev@vger.kernel.org>
Subject: Re: [PATCH net-next 1/3] mpls: move mpls_route nexthop fields to a new nhlfe struct
Date: Wed, 12 Aug 2015 20:15:07 +0100 [thread overview]
Message-ID: <55CB9B3B.1060809@brocade.com> (raw)
In-Reply-To: <1439329548-50935-2-git-send-email-roopa@cumulusnetworks.com>
On 11/08/15 22:45, Roopa Prabhu wrote:
> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>
> moves mpls_route nexthop fields to a new mpls_nhlfe
> struct. mpls_nhlfe represents a mpls nexthop label forwarding entry.
> It prepares mpls route structure for multipath support.
>
> In the process moves mpls_route structure into internal.h.
Is there a requirement for moving this and the new datastructures into
internal.h? I may have missed it, but I don't see any dependency on this
in this patch series.
> Moves some of the code from mpls_route_add into a separate mpls
> nhlfe build function. changed mpls_rt_alloc to take number of
> nexthops as argument.
>
> A mpls route can point to multiple mpls_nhlfe. This patch
> does not support multipath yet, hence the rest of the changes
> assume that a mpls route points to a single mpls_nhlfe
>
> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
> ---
> net/mpls/af_mpls.c | 225 ++++++++++++++++++++++++++++-----------------------
> net/mpls/internal.h | 35 ++++++++
> 2 files changed, 158 insertions(+), 102 deletions(-)
>
> diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
> index 8c5707d..cf86e9d 100644
> --- a/net/mpls/af_mpls.c
> +++ b/net/mpls/af_mpls.c
> @@ -21,35 +21,6 @@
> #endif
> #include "internal.h"
>
> -#define LABEL_NOT_SPECIFIED (1<<20)
> -#define MAX_NEW_LABELS 2
> -
> -/* This maximum ha length copied from the definition of struct neighbour */
> -#define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long)))
> -
> -enum mpls_payload_type {
> - MPT_UNSPEC, /* IPv4 or IPv6 */
> - MPT_IPV4 = 4,
> - MPT_IPV6 = 6,
> -
> - /* Other types not implemented:
> - * - Pseudo-wire with or without control word (RFC4385)
> - * - GAL (RFC5586)
> - */
> -};
> -
> -struct mpls_route { /* next hop label forwarding entry */
> - struct net_device __rcu *rt_dev;
> - struct rcu_head rt_rcu;
> - u32 rt_label[MAX_NEW_LABELS];
> - u8 rt_protocol; /* routing protocol that set this entry */
> - u8 rt_payload_type;
> - u8 rt_labels;
> - u8 rt_via_alen;
> - u8 rt_via_table;
> - u8 rt_via[0];
> -};
> -
> static int zero = 0;
> static int label_limit = (1 << 20) - 1;
>
...
> @@ -281,13 +254,15 @@ struct mpls_route_config {
> struct nl_info rc_nlinfo;
> };
>
> -static struct mpls_route *mpls_rt_alloc(size_t alen)
> +static struct mpls_route *mpls_rt_alloc(int num_nh)
> {
> struct mpls_route *rt;
>
> - rt = kzalloc(sizeof(*rt) + alen, GFP_KERNEL);
> + rt = kzalloc(sizeof(*rt) + (num_nh * sizeof(struct mpls_nhlfe)),
How about this instead:
offsetof(typeof(*rt), rt_nh[num_nh])
?
That way, you don't need to write out the type of rt_nh here.
> + GFP_KERNEL);
> if (rt)
> - rt->rt_via_alen = alen;
> + rt->rt_nhn = num_nh;
> +
> return rt;
> }
>
> @@ -322,7 +297,7 @@ static void mpls_route_update(struct net *net, unsigned index,
>
> platform_label = rtnl_dereference(net->mpls.platform_label);
> rt = rtnl_dereference(platform_label[index]);
> - if (!dev || (rt && (rtnl_dereference(rt->rt_dev) == dev))) {
> + if (!dev || (rt && (rtnl_dereference(rt->rt_nh->nh_dev) == dev))) {
> rcu_assign_pointer(platform_label[index], new);
> old = rt;
> }
> @@ -406,23 +381,23 @@ static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr)
> #endif
>
> static struct net_device *find_outdev(struct net *net,
> - struct mpls_route_config *cfg)
> + struct mpls_nhlfe *nhlfe, int oif)
> {
> struct net_device *dev = NULL;
>
> - if (!cfg->rc_ifindex) {
> - switch (cfg->rc_via_table) {
> + if (!oif) {
> + switch (nhlfe->nh_via_table) {
> case NEIGH_ARP_TABLE:
> - dev = inet_fib_lookup_dev(net, cfg->rc_via);
> + dev = inet_fib_lookup_dev(net, nhlfe->nh_via);
> break;
> case NEIGH_ND_TABLE:
> - dev = inet6_fib_lookup_dev(net, cfg->rc_via);
> + dev = inet6_fib_lookup_dev(net, nhlfe->nh_via);
> break;
> case NEIGH_LINK_TABLE:
> break;
> }
> } else {
> - dev = dev_get_by_index(net, cfg->rc_ifindex);
> + dev = dev_get_by_index(net, oif);
> }
>
> if (!dev)
> @@ -431,15 +406,81 @@ static struct net_device *find_outdev(struct net *net,
> return dev;
> }
>
> +int mpls_nhlfe_assign_dev(struct net *net, struct mpls_nhlfe *nhlfe, int oif)
> +{
> + struct net_device *dev = NULL;
> + int err = -ENODEV;
> +
> + dev = find_outdev(net, nhlfe, oif);
> + if (IS_ERR(dev)) {
> + err = PTR_ERR(dev);
> + dev = NULL;
> + goto errout;
> + }
> +
> + /* Ensure this is a supported device */
> + err = -EINVAL;
> + if (!mpls_dev_get(dev))
> + goto errout;
> +
> + /* For now just support ethernet devices */
> + err = -EINVAL;
> + if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK))
> + goto errout;
Isn't this interface type check redundant with the mpls_dev_get call
just above?
> +
> + RCU_INIT_POINTER(nhlfe->nh_dev, dev);
> + dev_put(dev);
Is it safe to release the reference to dev here? Prior to these changes,
it was released after mpls_route_update is called in mpls_route_add - is
that OK without a reference?
> +
> + return 0;
> +
> +errout:
> + if (dev)
> + dev_put(dev);
> + return err;
> +}
> +
> +static int mpls_nhlfe_build(struct mpls_route_config *cfg,
> + struct mpls_nhlfe *nhlfe)
> +{
> + struct net *net = cfg->rc_nlinfo.nl_net;
> + int err = -ENOMEM;
> + int i;
> +
> + if (!nhlfe)
> + goto errout;
> +
> + err = -EINVAL;
> + /* Ensure only a supported number of labels are present */
> + if (cfg->rc_output_labels > MAX_NEW_LABELS)
> + goto errout;
> +
> + nhlfe->nh_labels = cfg->rc_output_labels;
> + for (i = 0; i < nhlfe->nh_labels; i++)
> + nhlfe->nh_label[i] = cfg->rc_output_label[i];
> +
> + nhlfe->nh_payload_type = cfg->rc_payload_type;
> + nhlfe->nh_via_table = cfg->rc_via_table;
> + memcpy(nhlfe->nh_via, cfg->rc_via, cfg->rc_via_alen);
> + nhlfe->nh_via_alen = cfg->rc_via_alen;
Shouldn't this check that was removed from mpls_route_add be added here:
- if ((cfg->rc_via_table == NEIGH_LINK_TABLE) &&
- (dev->addr_len != cfg->rc_via_alen))
- goto errout;
?
> +
> + err = mpls_nhlfe_assign_dev(net, nhlfe, cfg->rc_ifindex);
> + if (err)
> + goto errout;
> +
> + return 0;
> +
> +errout:
> + return err;
> +}
> +
> static int mpls_route_add(struct mpls_route_config *cfg)
> {
> struct mpls_route __rcu **platform_label;
> struct net *net = cfg->rc_nlinfo.nl_net;
> - struct net_device *dev = NULL;
> struct mpls_route *rt, *old;
> - unsigned index;
> - int i;
> int err = -EINVAL;
> + unsigned index;
> + int nhs = 1; /* default to one nexthop */
>
> index = cfg->rc_label;
>
> @@ -457,27 +498,6 @@ static int mpls_route_add(struct mpls_route_config *cfg)
> if (index >= net->mpls.platform_labels)
> goto errout;
>
> - /* Ensure only a supported number of labels are present */
> - if (cfg->rc_output_labels > MAX_NEW_LABELS)
> - goto errout;
> -
> - dev = find_outdev(net, cfg);
> - if (IS_ERR(dev)) {
> - err = PTR_ERR(dev);
> - dev = NULL;
> - goto errout;
> - }
> -
> - /* Ensure this is a supported device */
> - err = -EINVAL;
> - if (!mpls_dev_get(dev))
> - goto errout;
> -
> - err = -EINVAL;
> - if ((cfg->rc_via_table == NEIGH_LINK_TABLE) &&
> - (dev->addr_len != cfg->rc_via_alen))
> - goto errout;
> -
> /* Append makes no sense with mpls */
> err = -EOPNOTSUPP;
> if (cfg->rc_nlflags & NLM_F_APPEND)
...
> diff --git a/net/mpls/internal.h b/net/mpls/internal.h
> index 2681a4b..f05e2e8 100644
> --- a/net/mpls/internal.h
> +++ b/net/mpls/internal.h
...
> @@ -21,6 +32,30 @@ struct mpls_dev {
>
> struct sk_buff;
>
> +#define LABEL_NOT_SPECIFIED (1 << 20)
> +#define MAX_NEW_LABELS 2
> +
> +/* This maximum ha length copied from the definition of struct neighbour */
> +#define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long)))
> +
> +struct mpls_nhlfe { /* next hop label forwarding entry */
Can we just call this mpls_nh? IMHO, NHLFE is a bit of MPLS-specific
jargon that doesn't really add anything and may impact readability for
anybody familiar with other protocols, but not with the MPLS RFCs.
> + struct net_device __rcu *nh_dev;
> + u32 nh_label[MAX_NEW_LABELS];
> + unsigned int nh_flags;
Is it worth adding the nh_flags field? Unless I missed something it
isn't used in this patch and isn't written in any subsequent patches.
> + u8 nh_payload_type;
nh_payload_type isn't really an attribute of the nexthop, but of the
route - it's signally the type of traffic that is using that route.
> + u8 nh_labels;
> + u8 nh_via_alen;
> + u8 nh_via_table;
> + u8 nh_via[MAX_VIA_ALEN];
MAX_VIA_ALEN is 32 bytes, so there is now a 28 byte overhead per nexthop
for the common case of using an IPv4 nexthop. With a large number of
routes/nexthops, this will become noticeable. If we avoided using a
fixed allocation, it would mitigate this.
> +};
> +
> +struct mpls_route { /* next hop label forwarding entry */
Is it still the NHLFE? :-)
> + struct rcu_head rt_rcu;
> + u8 rt_protocol;
> + int rt_nhn;
> + struct mpls_nhlfe rt_nh[0];
> +};
> +
> static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb)
> {
> return (struct mpls_shim_hdr *)skb_network_header(skb);
>
Thanks,
Rob
next prev parent reply other threads:[~2015-08-12 19:15 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-08-11 21:45 [PATCH net-next 1/3] mpls: move mpls_route nexthop fields to a new nhlfe struct Roopa Prabhu
2015-08-12 19:15 ` Robert Shearman [this message]
2015-08-13 3:16 ` roopa
2015-08-13 14:01 ` Robert Shearman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=55CB9B3B.1060809@brocade.com \
--to=rshearma@brocade.com \
--cc=davem@davemloft.net \
--cc=ebiederm@xmission.com \
--cc=netdev@vger.kernel.org \
--cc=roopa@cumulusnetworks.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.