Re: [RFC PATCH net-next 2/5] vxlan: make COLLECT_METADATA mode bridge friendly

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Roopa Prabhu <roopa@cumulusnetworks.com>
To: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Cc: netdev@vger.kernel.org, davem@davemloft.net,
	stephen@networkplumber.org, tgraf@suug.ch,
	hannes@stressinduktion.org, jbenc@redhat.com, pshelar@ovn.org,
	dsa@cumulusnetworks.com, hadi@mojatatu.com
Subject: Re: [RFC PATCH net-next 2/5] vxlan: make COLLECT_METADATA mode bridge friendly
Date: Sun, 22 Jan 2017 07:18:09 -0800	[thread overview]
Message-ID: <5884CD31.5070708@cumulusnetworks.com> (raw)
In-Reply-To: <f4c86e0a-472b-700c-98fe-f5da2eea35e6@cumulusnetworks.com>

On 1/22/17, 3:40 AM, Nikolay Aleksandrov wrote:
> On 21/01/17 06:46, Roopa Prabhu wrote:
>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>
>> This patch series makes vxlan COLLECT_METADATA mode bridge
>> and layer2 network friendly. Vxlan COLLECT_METADATA mode today
>> solves the per-vni netdev scalability problem in l3 networks.
>> When vxlan collect metadata device participates in bridging
>> vlan to vn-segments, It can only get the vlan mapped vni in
>> the xmit tunnel dst metadata. It will need the vxlan driver to
>> continue learn, hold forwarding state and remote destination
>> information similar to how it already does for non COLLECT_METADATA
>> vxlan netdevices today.
>>
>> Changes introduced by this patch:
>>     - allow learning and forwarding database state to vxlan netdev in
>>       COLLECT_METADATA mode. Current behaviour is not changed
>>       by default. tunnel info flag IP_TUNNEL_INFO_BRIDGE is used
>>       to support the new bridge friendly mode.
>>     - A single fdb table hashed by (mac, vni) to allow fdb entries with
>>       multiple vnis in the same fdb table
>>     - rx path already has the vni
>>     - tx path expects a vni in the packet with dst_metadata
>>     - prior to this series, fdb remote_dsts carried remote vni and
>>       the vxlan device carrying the fdb table represented the
>>       source vni. With the vxlan device now representing multiple vnis,
>>       this patch adds a src vni attribute to the fdb entry. The remote
>>       vni already uses NDA_VNI attribute. This patch introduces
>>       NDA_SRC_VNI netlink attribute to represent the src vni in a multi
>>       vni fdb table.
>>
>> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
>> ---
> [snip]
>> @@ -2173,23 +2221,29 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
>>  	bool did_rsc = false;
>>  	struct vxlan_rdst *rdst, *fdst = NULL;
>>  	struct vxlan_fdb *f;
>> +	__be32 vni = 0;
>>  
>>  	info = skb_tunnel_info(skb);
>>  
>>  	skb_reset_mac_header(skb);
>>  
>>  	if (vxlan->flags & VXLAN_F_COLLECT_METADATA) {
>> -		if (info && info->mode & IP_TUNNEL_INFO_TX)
>> -			vxlan_xmit_one(skb, dev, NULL, false);
>> -		else
>> -			kfree_skb(skb);
>> -		return NETDEV_TX_OK;
>> +		if (info && info->mode & IP_TUNNEL_INFO_BRIDGE &&
>> +		    info->mode & IP_TUNNEL_INFO_TX) {
> nit: parentheses around the IP_TUNNEL_INFO_TX check
>
>> +			vni = tunnel_id_to_key32(info->key.tun_id);
>> +		} else {
>> +			if (info && info->mode & IP_TUNNEL_INFO_TX)
> nit: parentheses around the IP_TUNNEL_INFO_TX check

ack
>> +				vxlan_xmit_one(skb, dev, vni, NULL, false);
>> +			else
>> +				kfree_skb(skb);
>> +			return NETDEV_TX_OK;
>> +		}
>>  	}
>>  
>>  	if (vxlan->flags & VXLAN_F_PROXY) {
>>  		eth = eth_hdr(skb);
>>  		if (ntohs(eth->h_proto) == ETH_P_ARP)
>> -			return arp_reduce(dev, skb);
>> +			return arp_reduce(dev, skb, vni);
>>  #if IS_ENABLED(CONFIG_IPV6)
>>  		else if (ntohs(eth->h_proto) == ETH_P_IPV6 &&
>>  			 pskb_may_pull(skb, sizeof(struct ipv6hdr)
>> @@ -2200,13 +2254,13 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
>>  				msg = (struct nd_msg *)skb_transport_header(skb);
>>  				if (msg->icmph.icmp6_code == 0 &&
>>  				    msg->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION)
>> -					return neigh_reduce(dev, skb);
>> +					return neigh_reduce(dev, skb, vni);
>>  		}
>>  #endif
>>  	}
>>  
>>  	eth = eth_hdr(skb);
>> -	f = vxlan_find_mac(vxlan, eth->h_dest);
>> +	f = vxlan_find_mac(vxlan, eth->h_dest, vni);
>>  	did_rsc = false;
>>  
>>  	if (f && (f->flags & NTF_ROUTER) && (vxlan->flags & VXLAN_F_RSC) &&
>> @@ -2214,11 +2268,11 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
>>  	     ntohs(eth->h_proto) == ETH_P_IPV6)) {
>>  		did_rsc = route_shortcircuit(dev, skb);
>>  		if (did_rsc)
>> -			f = vxlan_find_mac(vxlan, eth->h_dest);
>> +			f = vxlan_find_mac(vxlan, eth->h_dest, vni);
>>  	}
>>  
>>  	if (f == NULL) {
>> -		f = vxlan_find_mac(vxlan, all_zeros_mac);
>> +		f = vxlan_find_mac(vxlan, all_zeros_mac, vni);
>>  		if (f == NULL) {
>>  			if ((vxlan->flags & VXLAN_F_L2MISS) &&
>>  			    !is_multicast_ether_addr(eth->h_dest))
>> @@ -2239,11 +2293,11 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
>>  		}
>>  		skb1 = skb_clone(skb, GFP_ATOMIC);
>>  		if (skb1)
>> -			vxlan_xmit_one(skb1, dev, rdst, did_rsc);
>> +			vxlan_xmit_one(skb1, dev, vni, rdst, did_rsc);
>>  	}
>>  
>>  	if (fdst)
>> -		vxlan_xmit_one(skb, dev, fdst, did_rsc);
>> +		vxlan_xmit_one(skb, dev, vni, fdst, did_rsc);
>>  	else
>>  		kfree_skb(skb);
>>  	return NETDEV_TX_OK;
>> @@ -2307,12 +2361,12 @@ static int vxlan_init(struct net_device *dev)
>>  	return 0;
>>  }
>>  
>> -static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan)
>> +static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni)
>>  {
>>  	struct vxlan_fdb *f;
>>  
>>  	spin_lock_bh(&vxlan->hash_lock);
>> -	f = __vxlan_find_mac(vxlan, all_zeros_mac);
>> +	f = __vxlan_find_mac(vxlan, all_zeros_mac, vni);
>>  	if (f)
>>  		vxlan_fdb_destroy(vxlan, f);
>>  	spin_unlock_bh(&vxlan->hash_lock);
>> @@ -2322,7 +2376,7 @@ static void vxlan_uninit(struct net_device *dev)
>>  {
>>  	struct vxlan_dev *vxlan = netdev_priv(dev);
>>  
>> -	vxlan_fdb_delete_default(vxlan);
>> +	vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni);
>>  
>>  	free_percpu(dev->tstats);
>>  }
>> @@ -2536,6 +2590,8 @@ static void vxlan_setup(struct net_device *dev)
>>  	dev->vlan_features = dev->features;
>>  	dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
>>  	dev->hw_features |= NETIF_F_GSO_SOFTWARE;
>> +	dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
>> +	dev->features |= dev->hw_features;
>>  	netif_keep_dst(dev);
>>  	dev->priv_flags |= IFF_NO_QUEUE;
>>  
>> @@ -2921,6 +2977,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
>>  				       NLM_F_EXCL|NLM_F_CREATE,
>>  				       vxlan->cfg.dst_port,
>>  				       vxlan->default_dst.remote_vni,
>> +				       vxlan->default_dst.remote_vni,
>>  				       vxlan->default_dst.remote_ifindex,
>>  				       NTF_SELF);
>>  		if (err)
>> @@ -2929,7 +2986,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
>>  
>>  	err = register_netdevice(dev);
>>  	if (err) {
>> -		vxlan_fdb_delete_default(vxlan);
>> +		vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni);
>>  		return err;
>>  	}
>>  
>> @@ -3023,19 +3080,19 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
>>  		conf.flags |= VXLAN_F_UDP_ZERO_CSUM_TX;
>>  
>>  	if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX] &&
>> -	    nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]))
>> +	    !nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]))
>>  		conf.flags |= VXLAN_F_UDP_ZERO_CSUM6_TX;
>>  
>>  	if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX] &&
>> -	    nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]))
>> +	    !nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]))
>>  		conf.flags |= VXLAN_F_UDP_ZERO_CSUM6_RX;
>>  
>>  	if (data[IFLA_VXLAN_REMCSUM_TX] &&
>> -	    nla_get_u8(data[IFLA_VXLAN_REMCSUM_TX]))
>> +	    !nla_get_u8(data[IFLA_VXLAN_REMCSUM_TX]))
>>  		conf.flags |= VXLAN_F_REMCSUM_TX;
>>  
>>  	if (data[IFLA_VXLAN_REMCSUM_RX] &&
>> -	    nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
>> +	    !nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
>>  		conf.flags |= VXLAN_F_REMCSUM_RX;
> Aren't these going to break user-space ? 

correct... ignore these. Not intentional. these were from an incorrect merge with an earlier changelink patch i had.
Did not realize these had crept it.

thanks.

next prev parent reply	other threads:[~2017-01-22 15:18 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-01-21  5:46 [RFC PATCH net-next 0/5] bridge: per vlan lwt and dst_metadata support Roopa Prabhu
2017-01-21  5:46 ` [RFC PATCH net-next 1/5] ip_tunnels: new IP_TUNNEL_INFO_BRIDGE flag for ip_tunnel_info mode Roopa Prabhu
2017-01-21  5:46 ` [RFC PATCH net-next 2/5] vxlan: make COLLECT_METADATA mode bridge friendly Roopa Prabhu
2017-01-22 11:40   ` Nikolay Aleksandrov
2017-01-22 15:18     ` Roopa Prabhu [this message]
2017-01-21  5:46 ` [RFC PATCH net-next 3/5] bridge: uapi: add per vlan tunnel info Roopa Prabhu
2017-01-21 16:59   ` Roopa Prabhu
2017-01-21  5:46 ` [RFC PATCH net-next 4/5] bridge: vlan lwt and dst_metadata netlink support Roopa Prabhu
2017-01-22 12:05   ` Nikolay Aleksandrov
2017-01-22 15:23     ` Roopa Prabhu
2017-01-23  0:22   ` Rosen, Rami
2017-01-23 15:39     ` Roopa Prabhu
2017-01-21  5:46 ` [RFC PATCH net-next 5/5] bridge: vlan lwt dst_metadata hooks in ingress and egress paths Roopa Prabhu
2017-01-22 12:15   ` Nikolay Aleksandrov
2017-01-22 15:27     ` Roopa Prabhu
2017-01-23  8:08 ` [RFC PATCH net-next 0/5] bridge: per vlan lwt and dst_metadata support Jiri Pirko
2017-01-23  8:51   ` Jiri Benc
2017-01-23 16:13     ` Roopa Prabhu
2017-01-23 16:24       ` Jiri Benc
2017-01-24  0:00         ` Roopa Prabhu
     [not found]       ` <CAJ3xEMiC5xJ+rex8xMnyuGj5QKj+sYA9A6JjOM0xQaZraFSHig@mail.gmail.com>
2017-01-24  0:09         ` Roopa Prabhu
2017-01-24 15:47 ` Stephen Hemminger
2017-01-25 17:08   ` Roopa Prabhu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5884CD31.5070708@cumulusnetworks.com \
    --to=roopa@cumulusnetworks.com \
    --cc=davem@davemloft.net \
    --cc=dsa@cumulusnetworks.com \
    --cc=hadi@mojatatu.com \
    --cc=hannes@stressinduktion.org \
    --cc=jbenc@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=nikolay@cumulusnetworks.com \
    --cc=pshelar@ovn.org \
    --cc=stephen@networkplumber.org \
    --cc=tgraf@suug.ch \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.