* [RFC v2 1/3] udp_tunnel: add config option to bind to a device
2018-11-19 17:19 [RFC v2 0/3] Add VRF support for VXLAN underlay Alexis Bauvin
@ 2018-11-19 17:19 ` Alexis Bauvin
2018-11-19 17:19 ` [RFC v2 2/3] vxlan: add support for underlay in non-default VRF Alexis Bauvin
2018-11-19 17:19 ` [RFC v2 3/3] vxlan: handle underlay VRF changes Alexis Bauvin
2 siblings, 0 replies; 4+ messages in thread
From: Alexis Bauvin @ 2018-11-19 17:19 UTC (permalink / raw)
To: dsa, roopa; +Cc: netdev, abauvin, akherbouche
UDP tunnel sockets are always opened unbound to a specific device. This
patch allow the socket to be bound on a custom device, which
incidentally makes UDP tunnels VRF-aware if binding to an l3mdev.
Signed-off-by: Alexis Bauvin <abauvin@scaleway.com>
Reviewed-by: Amine Kherbouche <akherbouche@scaleway.com>
Tested-by: Amine Kherbouche <akherbouche@scaleway.com>
---
include/net/udp_tunnel.h | 1 +
net/ipv4/udp_tunnel.c | 10 ++++++++++
net/ipv6/ip6_udp_tunnel.c | 9 +++++++++
3 files changed, 20 insertions(+)
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index fe680ab6b15a..9f7970d010f9 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -30,6 +30,7 @@ struct udp_port_cfg {
__be16 local_udp_port;
__be16 peer_udp_port;
+ int bind_ifindex;
unsigned int use_udp_checksums:1,
use_udp6_tx_checksums:1,
use_udp6_rx_checksums:1,
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 6539ff15e9a3..dc68e15a4f72 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -20,6 +20,16 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
if (err < 0)
goto error;
+ if (cfg->bind_ifindex) {
+ struct net_device *dev;
+
+ dev = __dev_get_by_index(net, cfg->bind_ifindex);
+ err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE,
+ dev->name, strlen(dev->name) + 1);
+ if (err < 0)
+ goto error;
+ }
+
udp_addr.sin_family = AF_INET;
udp_addr.sin_addr = cfg->local_ip;
udp_addr.sin_port = cfg->local_udp_port;
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index b283f293ee4a..fc3811ef8787 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -31,6 +31,15 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
if (err < 0)
goto error;
}
+ if (cfg->bind_ifindex) {
+ struct net_device *dev;
+
+ dev = __dev_get_by_index(net, cfg->bind_ifindex);
+ err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE,
+ dev->name, strlen(dev->name) + 1);
+ if (err < 0)
+ goto error;
+ }
udp6_addr.sin6_family = AF_INET6;
memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
--
^ permalink raw reply related [flat|nested] 4+ messages in thread* [RFC v2 2/3] vxlan: add support for underlay in non-default VRF
2018-11-19 17:19 [RFC v2 0/3] Add VRF support for VXLAN underlay Alexis Bauvin
2018-11-19 17:19 ` [RFC v2 1/3] udp_tunnel: add config option to bind to a device Alexis Bauvin
@ 2018-11-19 17:19 ` Alexis Bauvin
2018-11-19 17:19 ` [RFC v2 3/3] vxlan: handle underlay VRF changes Alexis Bauvin
2 siblings, 0 replies; 4+ messages in thread
From: Alexis Bauvin @ 2018-11-19 17:19 UTC (permalink / raw)
To: dsa, roopa; +Cc: netdev, abauvin, akherbouche
Creating a VXLAN device with is underlay in the non-default VRF makes
egress route lookup fail or incorrect since it will resolve in the
default VRF, and ingress fail because the socket listens in the default
VRF.
This patch binds the underlying UDP tunnel socket to the l3mdev of the
lower device of the VXLAN device. This will listen in the proper VRF and
output traffic from said l3mdev, matching l3mdev routing rules and
looking up the correct routing table.
When the VXLAN device does not have a lower device, or the lower device
is in the default VRF, the socket will not be bound to any interface,
keeping the previous behaviour.
The underlay l3mdev is deduced from the VXLAN lower device
(IFLA_VXLAN_LINK).
The l3mdev_master_upper_ifindex_by_index function has been added to
l3mdev. Its goal is to fetch the effective l3mdev of an interface which
is not a direct slave of said l3mdev. It handles the following example,
properly resolving the l3mdev of eth0 to vrf-blue:
+----------+ +---------+
| | | |
| vrf-blue | | vrf-red |
| | | |
+----+-----+ +----+----+
| |
| |
+----+-----+ +----+----+
| | | |
| br-blue | | br-red |
| | | |
+----+-----+ +---+-+---+
| | |
| +-----+ +-----+
| | |
+----+-----+ +------+----+ +----+----+
| | lower device | | | |
| eth0 | <- - - - - - - | vxlan-red | | tap-red | (... more taps)
| | | | | |
+----------+ +-----------+ +---------+
Signed-off-by: Alexis Bauvin <abauvin@scaleway.com>
Reviewed-by: Amine Kherbouche <akherbouche@scaleway.com>
Tested-by: Amine Kherbouche <akherbouche@scaleway.com>
---
drivers/net/vxlan.c | 32 ++++++++++++++++++++++++--------
include/net/l3mdev.h | 21 +++++++++++++++++++++
net/l3mdev/l3mdev.c | 18 ++++++++++++++++++
3 files changed, 63 insertions(+), 8 deletions(-)
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 27bd586b94b0..a3de08122269 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -212,7 +212,7 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
* and enabled unshareable flags.
*/
static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
- __be16 port, u32 flags)
+ __be16 port, u32 flags, int ifindex)
{
struct vxlan_sock *vs;
@@ -221,7 +221,8 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
if (inet_sk(vs->sock->sk)->inet_sport == port &&
vxlan_get_sk_family(vs) == family &&
- vs->flags == flags)
+ vs->flags == flags &&
+ vs->sock->sk->sk_bound_dev_if == ifindex)
return vs;
}
return NULL;
@@ -261,7 +262,7 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, int ifindex,
{
struct vxlan_sock *vs;
- vs = vxlan_find_sock(net, family, port, flags);
+ vs = vxlan_find_sock(net, family, port, flags, ifindex);
if (!vs)
return NULL;
@@ -2172,6 +2173,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
struct rtable *rt;
__be16 df = 0;
+ if (!ifindex)
+ ifindex = sock4->sock->sk->sk_bound_dev_if;
+
rt = vxlan_get_route(vxlan, dev, sock4, skb, ifindex, tos,
dst->sin.sin_addr.s_addr,
&local_ip.sin.sin_addr.s_addr,
@@ -2210,6 +2214,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
} else {
struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
+ if (!ifindex)
+ ifindex = sock6->sock->sk->sk_bound_dev_if;
+
ndst = vxlan6_get_route(vxlan, dev, sock6, skb, ifindex, tos,
label, &dst->sin6.sin6_addr,
&local_ip.sin6.sin6_addr,
@@ -2813,7 +2820,7 @@ static const struct ethtool_ops vxlan_ethtool_ops = {
};
static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
- __be16 port, u32 flags)
+ __be16 port, u32 flags, int ifindex)
{
struct socket *sock;
struct udp_port_cfg udp_conf;
@@ -2831,6 +2838,7 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
}
udp_conf.local_udp_port = port;
+ udp_conf.bind_ifindex = ifindex;
/* Open UDP socket */
err = udp_sock_create(net, &udp_conf, &sock);
@@ -2842,7 +2850,8 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
/* Create new listen socket if needed */
static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
- __be16 port, u32 flags)
+ __be16 port, u32 flags,
+ int ifindex)
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_sock *vs;
@@ -2857,7 +2866,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
for (h = 0; h < VNI_HASH_SIZE; ++h)
INIT_HLIST_HEAD(&vs->vni_list[h]);
- sock = vxlan_create_sock(net, ipv6, port, flags);
+ sock = vxlan_create_sock(net, ipv6, port, flags, ifindex);
if (IS_ERR(sock)) {
kfree(vs);
return ERR_CAST(sock);
@@ -2894,11 +2903,17 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
struct vxlan_sock *vs = NULL;
struct vxlan_dev_node *node;
+ int l3mdev_index;
+
+ l3mdev_index =
+ l3mdev_master_upper_ifindex_by_index(vxlan->net,
+ vxlan->cfg.remote_ifindex);
if (!vxlan->cfg.no_share) {
spin_lock(&vn->sock_lock);
vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
- vxlan->cfg.dst_port, vxlan->cfg.flags);
+ vxlan->cfg.dst_port, vxlan->cfg.flags,
+ l3mdev_index);
if (vs && !refcount_inc_not_zero(&vs->refcnt)) {
spin_unlock(&vn->sock_lock);
return -EBUSY;
@@ -2907,7 +2922,8 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
}
if (!vs)
vs = vxlan_socket_create(vxlan->net, ipv6,
- vxlan->cfg.dst_port, vxlan->cfg.flags);
+ vxlan->cfg.dst_port, vxlan->cfg.flags,
+ l3mdev_index);
if (IS_ERR(vs))
return PTR_ERR(vs);
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index 3832099289c5..2c02bf003b21 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -101,6 +101,17 @@ struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev)
return master;
}
+int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex);
+static inline
+int l3mdev_master_upper_ifindex_by_index(struct net *net, int ifindex)
+{
+ rcu_read_lock();
+ ifindex = l3mdev_master_upper_ifindex_by_index_rcu(net, ifindex);
+ rcu_read_unlock();
+
+ return ifindex;
+}
+
u32 l3mdev_fib_table_rcu(const struct net_device *dev);
u32 l3mdev_fib_table_by_index(struct net *net, int ifindex);
static inline u32 l3mdev_fib_table(const struct net_device *dev)
@@ -207,6 +218,16 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex)
return 0;
}
+static
+int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex)
+{
+ return 0;
+}
+static int l3mdev_master_upper_ifindex_by_index(struct net *net, int ifindex)
+{
+ return 0;
+}
+
static inline
struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev)
{
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index 8da86ceca33d..309dee76724e 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -46,6 +46,24 @@ int l3mdev_master_ifindex_rcu(const struct net_device *dev)
}
EXPORT_SYMBOL_GPL(l3mdev_master_ifindex_rcu);
+/**
+ * l3mdev_master_upper_ifindex_by_index - get index of upper l3 master
+ * device
+ * @net: network namespace for device index lookup
+ * @ifindex: targeted interface
+ */
+int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex)
+{
+ struct net_device *dev;
+
+ dev = dev_get_by_index_rcu(net, ifindex);
+ while (dev && !netif_is_l3_master(dev))
+ dev = netdev_master_upper_dev_get(dev);
+
+ return dev ? dev->ifindex : 0;
+}
+EXPORT_SYMBOL_GPL(l3mdev_master_upper_ifindex_by_index_rcu);
+
/**
* l3mdev_fib_table - get FIB table id associated with an L3
* master interface
--
^ permalink raw reply related [flat|nested] 4+ messages in thread* [RFC v2 3/3] vxlan: handle underlay VRF changes
2018-11-19 17:19 [RFC v2 0/3] Add VRF support for VXLAN underlay Alexis Bauvin
2018-11-19 17:19 ` [RFC v2 1/3] udp_tunnel: add config option to bind to a device Alexis Bauvin
2018-11-19 17:19 ` [RFC v2 2/3] vxlan: add support for underlay in non-default VRF Alexis Bauvin
@ 2018-11-19 17:19 ` Alexis Bauvin
2 siblings, 0 replies; 4+ messages in thread
From: Alexis Bauvin @ 2018-11-19 17:19 UTC (permalink / raw)
To: dsa, roopa; +Cc: netdev, abauvin, akherbouche
When underlay VRF changes, either because the lower device itself changed,
or its VRF changed, this patch releases the current socket of the VXLAN
device and recreates another one in the right VRF. This allows for
on-the-fly change of the underlay VRF of a VXLAN device.
Signed-off-by: Alexis Bauvin <abauvin@scaleway.com>
Reviewed-by: Amine Kherbouche <akherbouche@scaleway.com>
Tested-by: Amine Kherbouche <akherbouche@scaleway.com>
---
drivers/net/vxlan.c | 94 +++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 94 insertions(+)
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index a3de08122269..13ed9569ec79 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -208,6 +208,18 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
return list_first_entry(&fdb->remotes, struct vxlan_rdst, list);
}
+static int vxlan_is_in_l3mdev_chain(struct net_device *chain,
+ struct net_device *dev)
+{
+ if (!chain)
+ return 0;
+
+ if (chain->ifindex == dev->ifindex)
+ return 1;
+ return vxlan_is_in_l3mdev_chain(netdev_master_upper_dev_get(chain),
+ dev);
+}
+
/* Find VXLAN socket based on network namespace, address family and UDP port
* and enabled unshareable flags.
*/
@@ -3720,6 +3732,33 @@ struct net_device *vxlan_dev_create(struct net *net, const char *name,
}
EXPORT_SYMBOL_GPL(vxlan_dev_create);
+static int vxlan_reopen(struct vxlan_net *vn, struct vxlan_dev *vxlan)
+{
+ int ret = 0;
+
+ if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip) &&
+ !vxlan_group_used(vn, vxlan))
+ ret = vxlan_igmp_leave(vxlan);
+ vxlan_sock_release(vxlan);
+
+ if (ret < 0)
+ return ret;
+
+ ret = vxlan_sock_add(vxlan);
+ if (ret < 0)
+ return ret;
+
+ if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) {
+ ret = vxlan_igmp_join(vxlan);
+ if (ret == -EADDRINUSE)
+ ret = 0;
+ if (ret)
+ vxlan_sock_release(vxlan);
+ }
+
+ return ret;
+}
+
static void vxlan_handle_lowerdev_unregister(struct vxlan_net *vn,
struct net_device *dev)
{
@@ -3742,6 +3781,55 @@ static void vxlan_handle_lowerdev_unregister(struct vxlan_net *vn,
unregister_netdevice_many(&list_kill);
}
+static void vxlan_handle_change_upper(struct vxlan_net *vn,
+ struct net_device *dev)
+{
+ struct vxlan_dev *vxlan, *next;
+
+ list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) {
+ struct net_device *lower;
+ int err;
+
+ lower = __dev_get_by_index(vxlan->net,
+ vxlan->cfg.remote_ifindex);
+ if (!vxlan_is_in_l3mdev_chain(lower, dev))
+ continue;
+
+ err = vxlan_reopen(vn, vxlan);
+ if (err < 0)
+ netdev_err(vxlan->dev, "Failed to reopen socket: %d\n",
+ err);
+ }
+}
+
+static void vxlan_handle_change(struct vxlan_net *vn, struct net_device *dev)
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_sock *sock;
+ int l3mdev_index;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ bool metadata = vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA;
+ bool ipv6 = vxlan->cfg.flags & VXLAN_F_IPV6 || metadata;
+#else
+ bool ipv6 = false;
+#endif
+
+ l3mdev_index =
+ l3mdev_master_upper_ifindex_by_index(vxlan->net,
+ vxlan->cfg.remote_ifindex);
+
+ sock = ipv6 ? rcu_dereference(vxlan->vn6_sock)
+ : rcu_dereference(vxlan->vn4_sock);
+ if (sock->sock->sk->sk_bound_dev_if != l3mdev_index) {
+ int ret = vxlan_reopen(vn, vxlan);
+
+ if (ret < 0)
+ netdev_err(vxlan->dev, "Failed to reopen socket: %d\n",
+ ret);
+ }
+}
+
static int vxlan_netdevice_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
@@ -3756,6 +3844,12 @@ static int vxlan_netdevice_event(struct notifier_block *unused,
} else if (event == NETDEV_UDP_TUNNEL_PUSH_INFO ||
event == NETDEV_UDP_TUNNEL_DROP_INFO) {
vxlan_offload_rx_ports(dev, event == NETDEV_UDP_TUNNEL_PUSH_INFO);
+ } else if (event == NETDEV_CHANGEUPPER) {
+ vxlan_handle_change_upper(vn, dev);
+ } else if (event == NETDEV_CHANGE) {
+ if (dev->rtnl_link_ops &&
+ !strcmp(dev->rtnl_link_ops->kind, vxlan_link_ops.kind))
+ vxlan_handle_change(vn, dev);
}
return NOTIFY_DONE;
--
^ permalink raw reply related [flat|nested] 4+ messages in thread