[net-next v3 1/3] udp: Expand UDP tunnel common APIs

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [net-next v3 1/3] udp: Expand UDP tunnel common APIs
@ 2014-08-27  3:35 Andy Zhou
  2014-08-27  3:35 ` [net-next v3 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions Andy Zhou
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Andy Zhou @ 2014-08-27  3:35 UTC (permalink / raw)
  To: davem; +Cc: netdev, Andy Zhou

Added create_udp_tunnel_socket(), packet receive and transmit,  and
other related common functions for UDP tunnels.

Per net open UDP tunnel ports are tracked in this common layer to
prevent sharing of a single port with more than one UDP tunnel.

Signed-off-by: Andy Zhou <azhou@nicira.com>
---
 include/net/udp_tunnel.h |   50 +++++++++++
 net/ipv4/udp_tunnel.c    |  216 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 265 insertions(+), 1 deletion(-)

diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index ffd69cb..5ff0746 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -1,6 +1,8 @@
 #ifndef __NET_UDP_TUNNEL_H
 #define __NET_UDP_TUNNEL_H
 
+#include <net/ip_tunnels.h>
+
 struct udp_port_cfg {
 	u8			family;
 
@@ -26,7 +28,55 @@ struct udp_port_cfg {
 				use_udp6_rx_checksums:1;
 };
 
+struct udp_tunnel_sock;
+
+typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
+typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk);
+
+struct udp_tunnel_socket_cfg {
+	struct udp_port_cfg port;
+
+	/* Used for setting up udp_sock fields, see udp.h for details */
+	__u8  encap_type;
+	udp_tunnel_encap_rcv_t encap_rcv;
+	udp_tunnel_encap_destroy_t encap_destroy;
+};
+
+struct udp_tunnel_sock {
+	struct hlist_node hlist;
+	struct socket *sock;
+};
+
 int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
 		    struct socket **sockp);
 
+struct udp_tunnel_sock *create_udp_tunnel_sock(struct net *net, size_t size,
+					       struct socket *sock,
+					       struct udp_tunnel_socket_cfg
+							*socket_cfg);
+
+struct udp_tunnel_sock *udp_tunnel_find_sock(struct net *net, __be16 port);
+
+int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt,
+			struct sk_buff *skb, __be32 src, __be32 dst,
+			__u8 tos, __u8 ttl, __be16 df, __be16 src_port,
+			__be16 dst_port, bool xnet);
+
+#if IS_ENABLED(CONFIG_IPV6)
+int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
+		struct sk_buff *skb, struct net_device *dev,
+		struct in6_addr *saddr, struct in6_addr *daddr,
+		__u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port);
+
+#endif
+
+void udp_tunnel_sock_release(struct udp_tunnel_sock *uts);
+
+static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb,
+							 bool udp_csum)
+{
+	int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+
+	return iptunnel_handle_offloads(skb, udp_csum, type);
+}
 #endif
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 61ec1a6..3ed66bc 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -7,6 +7,23 @@
 #include <net/udp.h>
 #include <net/udp_tunnel.h>
 #include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#include <net/ip6_tunnel.h>
+#include <net/ip6_checksum.h>
+#endif
+
+#define PORT_HASH_BITS 8
+#define PORT_HASH_SIZE (1 << PORT_HASH_BITS)
+
+static int udp_tunnel_net_id;
+
+struct udp_tunnel_net {
+	struct hlist_head sock_list[PORT_HASH_SIZE];
+	spinlock_t  sock_lock;   /* Protecting the sock_list */
+};
 
 int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
 		    struct socket **sockp)
@@ -82,7 +99,6 @@ int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
 		return -EPFNOSUPPORT;
 	}
 
-
 	*sockp = sock;
 
 	return 0;
@@ -97,4 +113,202 @@ error:
 }
 EXPORT_SYMBOL(udp_sock_create);
 
+
+/* Socket hash table head */
+static inline struct hlist_head *uts_head(struct net *net, const __be16 port)
+{
+	struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id);
+
+	return &utn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
+}
+
+struct udp_tunnel_sock *create_udp_tunnel_sock(struct net *net, size_t size,
+					       struct socket *sock,
+					       struct udp_tunnel_socket_cfg
+						      *cfg)
+{
+	struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id);
+	struct udp_tunnel_sock *uts;
+	struct sock *sk;
+	const __be16 port = cfg->port.local_udp_port;
+	const int ipv6 = (cfg->port.family == AF_INET6);
+	int err;
+
+	if (!sock)
+		err = udp_sock_create(net, &cfg->port, &sock);
+	else
+		err = (sock->sk->sk_protocol == IPPROTO_UDP) ?
+			0 : -EPROTONOSUPPORT;
+
+	if (err)
+		return NULL;
+
+	uts = kzalloc(size, GFP_KERNEL);
+	if (!uts)
+		return ERR_PTR(-ENOMEM);
+
+	sk = sock->sk;
+
+	/* Disable multicast loopback */
+	inet_sk(sk)->mc_loop = 0;
+
+	rcu_assign_sk_user_data(sk, uts);
+
+	udp_sk(sk)->encap_type = cfg->encap_type;
+	udp_sk(sk)->encap_rcv = cfg->encap_rcv;
+	udp_sk(sk)->encap_destroy = cfg->encap_destroy;
+
+	uts->sock = sock;
+
+	spin_lock(&utn->sock_lock);
+	hlist_add_head_rcu(&uts->hlist, uts_head(net, port));
+	spin_unlock(&utn->sock_lock);
+
+#if IS_ENABLED(CONFIG_IPV6)
+	if (ipv6)
+		ipv6_stub->udpv6_encap_enable();
+	else
+#endif
+		udp_encap_enable();
+
+	return uts;
+}
+EXPORT_SYMBOL_GPL(create_udp_tunnel_sock);
+
+int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt,
+			struct sk_buff *skb, __be32 src, __be32 dst,
+			__u8 tos, __u8 ttl, __be16 df, __be16 src_port,
+			__be16 dst_port, bool xnet)
+{
+	struct udphdr *uh;
+
+	__skb_push(skb, sizeof(*uh));
+	skb_reset_transport_header(skb);
+	uh = udp_hdr(skb);
+
+	uh->dest = dst_port;
+	uh->source = src_port;
+	uh->len = htons(skb->len);
+
+	udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len);
+
+	return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP,
+			     tos, ttl, df, xnet);
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb);
+
+#if IS_ENABLED(CONFIG_IPV6)
+int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
+			 struct sk_buff *skb, struct net_device *dev,
+			 struct in6_addr *saddr, struct in6_addr *daddr,
+			 __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port)
+{
+	struct udphdr *uh;
+	struct ipv6hdr *ip6h;
+
+	__skb_push(skb, sizeof(*uh));
+	skb_reset_transport_header(skb);
+	uh = udp_hdr(skb);
+
+	uh->dest = dst_port;
+	uh->source = src_port;
+
+	uh->len = htons(skb->len);
+	uh->check = 0;
+
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
+			    | IPSKB_REROUTED);
+	skb_dst_set(skb, dst);
+
+	if (!skb_is_gso(skb) && !(dst->dev->features & NETIF_F_IPV6_CSUM)) {
+		__wsum csum = skb_checksum(skb, 0, skb->len, 0);
+
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		uh->check = csum_ipv6_magic(saddr, daddr, skb->len,
+				IPPROTO_UDP, csum);
+		if (uh->check == 0)
+			uh->check = CSUM_MANGLED_0;
+	} else {
+		skb->ip_summed = CHECKSUM_PARTIAL;
+		skb->csum_start = skb_transport_header(skb) - skb->head;
+		skb->csum_offset = offsetof(struct udphdr, check);
+		uh->check = ~csum_ipv6_magic(saddr, daddr,
+				skb->len, IPPROTO_UDP, 0);
+	}
+
+	__skb_push(skb, sizeof(*ip6h));
+	skb_reset_network_header(skb);
+	ip6h		  = ipv6_hdr(skb);
+	ip6h->version	  = 6;
+	ip6h->priority	  = prio;
+	ip6h->flow_lbl[0] = 0;
+	ip6h->flow_lbl[1] = 0;
+	ip6h->flow_lbl[2] = 0;
+	ip6h->payload_len = htons(skb->len);
+	ip6h->nexthdr     = IPPROTO_UDP;
+	ip6h->hop_limit   = ttl;
+	ip6h->daddr	  = *daddr;
+	ip6h->saddr	  = *saddr;
+
+	ip6tunnel_xmit(skb, dev);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb);
+#endif
+
+struct udp_tunnel_sock *udp_tunnel_find_sock(struct net *net, __be16 port)
+{
+	struct udp_tunnel_sock *uts;
+
+	hlist_for_each_entry_rcu(uts, uts_head(net, port), hlist) {
+		if (inet_sk(uts->sock->sk)->inet_sport == port)
+			return uts;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_find_sock);
+
+void udp_tunnel_sock_release(struct udp_tunnel_sock *uts)
+{
+	struct sock *sk = uts->sock->sk;
+	struct net *net = sock_net(sk);
+	struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id);
+
+	spin_lock(&utn->sock_lock);
+	hlist_del_rcu(&uts->hlist);
+	rcu_assign_sk_user_data(uts->sock->sk, NULL);
+	kernel_sock_shutdown(uts->sock, SHUT_RDWR);
+	sk_release_kernel(sk);
+	spin_unlock(&utn->sock_lock);
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_sock_release);
+
+static int __net_init udp_tunnel_init_net(struct net *net)
+{
+	struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id);
+	unsigned int h;
+
+	spin_lock_init(&utn->sock_lock);
+
+	for (h = 0; h < PORT_HASH_SIZE; h++)
+		INIT_HLIST_HEAD(&utn->sock_list[h]);
+
+	return 0;
+}
+
+static struct pernet_operations udp_tunnel_net_ops = {
+	.init = udp_tunnel_init_net,
+	.exit = NULL,
+	.id = &udp_tunnel_net_id,
+	.size = sizeof(struct udp_tunnel_net),
+};
+
+static int __init udp_tunnel_init(void)
+{
+	return register_pernet_subsys(&udp_tunnel_net_ops);
+}
+late_initcall(udp_tunnel_init);
+
 MODULE_LICENSE("GPL");
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [net-next v3 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions.
  2014-08-27  3:35 [net-next v3 1/3] udp: Expand UDP tunnel common APIs Andy Zhou
@ 2014-08-27  3:35 ` Andy Zhou
  2014-08-27  4:15   ` Tom Herbert
       [not found]   ` <CA+mtBx8R2YyAMbPuw=iYDbLpSwRo_robnxOYPUFV2368-RzdLg@mail.gmail.com>
  2014-08-27  3:35 ` [net-next v3 3/3] l2tp: Refactor l2tp core " Andy Zhou
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 10+ messages in thread
From: Andy Zhou @ 2014-08-27  3:35 UTC (permalink / raw)
  To: davem; +Cc: netdev, Andy Zhou

Signed-off-by: Andy Zhou <azhou@nicira.com>
---
 drivers/net/vxlan.c           |  199 ++++++++++++-----------------------------
 include/net/vxlan.h           |   16 ++--
 net/openvswitch/vport-vxlan.c |    6 +-
 3 files changed, 68 insertions(+), 153 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index beb377b..f1f1c48 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -42,6 +42,7 @@
 #include <net/netns/generic.h>
 #include <net/vxlan.h>
 #include <net/protocol.h>
+#include <net/udp_tunnel.h>
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ipv6.h>
 #include <net/addrconf.h>
@@ -277,13 +278,7 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
 /* Find VXLAN socket based on network namespace and UDP port */
 static struct vxlan_sock *vxlan_find_sock(struct net *net, __be16 port)
 {
-	struct vxlan_sock *vs;
-
-	hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
-		if (inet_sk(vs->sock->sk)->inet_sport == port)
-			return vs;
-	}
-	return NULL;
+	return (struct vxlan_sock *)udp_tunnel_find_sock(net, port);
 }
 
 static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)
@@ -636,7 +631,7 @@ static int vxlan_gro_complete(struct sk_buff *skb, int nhoff)
 static void vxlan_notify_add_rx_port(struct vxlan_sock *vs)
 {
 	struct net_device *dev;
-	struct sock *sk = vs->sock->sk;
+	struct sock *sk = vs->uts.sock->sk;
 	struct net *net = sock_net(sk);
 	sa_family_t sa_family = sk->sk_family;
 	__be16 port = inet_sk(sk)->inet_sport;
@@ -661,7 +656,7 @@ static void vxlan_notify_add_rx_port(struct vxlan_sock *vs)
 static void vxlan_notify_del_rx_port(struct vxlan_sock *vs)
 {
 	struct net_device *dev;
-	struct sock *sk = vs->sock->sk;
+	struct sock *sk = vs->uts.sock->sk;
 	struct net *net = sock_net(sk);
 	sa_family_t sa_family = sk->sk_family;
 	__be16 port = inet_sk(sk)->inet_sport;
@@ -1053,7 +1048,7 @@ static void vxlan_sock_hold(struct vxlan_sock *vs)
 
 void vxlan_sock_release(struct vxlan_sock *vs)
 {
-	struct sock *sk = vs->sock->sk;
+	struct sock *sk = vs->uts.sock->sk;
 	struct net *net = sock_net(sk);
 	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
 
@@ -1062,7 +1057,6 @@ void vxlan_sock_release(struct vxlan_sock *vs)
 
 	spin_lock(&vn->sock_lock);
 	hlist_del_rcu(&vs->hlist);
-	rcu_assign_sk_user_data(vs->sock->sk, NULL);
 	vxlan_notify_del_rx_port(vs);
 	spin_unlock(&vn->sock_lock);
 
@@ -1078,7 +1072,7 @@ static void vxlan_igmp_join(struct work_struct *work)
 {
 	struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_join);
 	struct vxlan_sock *vs = vxlan->vn_sock;
-	struct sock *sk = vs->sock->sk;
+	struct sock *sk = vs->uts.sock->sk;
 	union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
 	int ifindex = vxlan->default_dst.remote_ifindex;
 
@@ -1107,7 +1101,7 @@ static void vxlan_igmp_leave(struct work_struct *work)
 {
 	struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_leave);
 	struct vxlan_sock *vs = vxlan->vn_sock;
-	struct sock *sk = vs->sock->sk;
+	struct sock *sk = vs->uts.sock->sk;
 	union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
 	int ifindex = vxlan->default_dst.remote_ifindex;
 
@@ -1338,7 +1332,6 @@ out:
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-
 static struct sk_buff *vxlan_na_create(struct sk_buff *request,
 	struct neighbour *n, bool isrouter)
 {
@@ -1572,13 +1565,6 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
 	return false;
 }
 
-static inline struct sk_buff *vxlan_handle_offloads(struct sk_buff *skb,
-						    bool udp_csum)
-{
-	int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
-	return iptunnel_handle_offloads(skb, udp_csum, type);
-}
-
 #if IS_ENABLED(CONFIG_IPV6)
 static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 			   struct dst_entry *dst, struct sk_buff *skb,
@@ -1587,13 +1573,13 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 			   __be16 src_port, __be16 dst_port, __be32 vni,
 			   bool xnet)
 {
-	struct ipv6hdr *ip6h;
 	struct vxlanhdr *vxh;
-	struct udphdr *uh;
 	int min_headroom;
 	int err;
 
-	skb = vxlan_handle_offloads(skb, !udp_get_no_check6_tx(vs->sock->sk));
+	skb = udp_tunnel_handle_offloads(skb,
+					 !udp_get_no_check6_tx(
+						 vs->uts.sock->sk));
 	if (IS_ERR(skb))
 		return -EINVAL;
 
@@ -1621,38 +1607,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 	vxh->vx_flags = htonl(VXLAN_FLAGS);
 	vxh->vx_vni = vni;
 
-	__skb_push(skb, sizeof(*uh));
-	skb_reset_transport_header(skb);
-	uh = udp_hdr(skb);
-
-	uh->dest = dst_port;
-	uh->source = src_port;
-
-	uh->len = htons(skb->len);
-
-	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
-			      IPSKB_REROUTED);
-	skb_dst_set(skb, dst);
-
-	udp6_set_csum(udp_get_no_check6_tx(vs->sock->sk), skb,
-		      saddr, daddr, skb->len);
-
-	__skb_push(skb, sizeof(*ip6h));
-	skb_reset_network_header(skb);
-	ip6h		  = ipv6_hdr(skb);
-	ip6h->version	  = 6;
-	ip6h->priority	  = prio;
-	ip6h->flow_lbl[0] = 0;
-	ip6h->flow_lbl[1] = 0;
-	ip6h->flow_lbl[2] = 0;
-	ip6h->payload_len = htons(skb->len);
-	ip6h->nexthdr     = IPPROTO_UDP;
-	ip6h->hop_limit   = ttl;
-	ip6h->daddr	  = *daddr;
-	ip6h->saddr	  = *saddr;
-
-	ip6tunnel_xmit(skb, dev);
+	udp_tunnel6_xmit_skb(vs->uts.sock, dst, skb, dev, saddr, daddr, prio,
+			     ttl, src_port, dst_port);
 	return 0;
 }
 #endif
@@ -1663,11 +1619,11 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 		   __be16 src_port, __be16 dst_port, __be32 vni, bool xnet)
 {
 	struct vxlanhdr *vxh;
-	struct udphdr *uh;
 	int min_headroom;
 	int err;
 
-	skb = vxlan_handle_offloads(skb, !vs->sock->sk->sk_no_check_tx);
+	skb = udp_tunnel_handle_offloads(skb,
+					 !vs->uts.sock->sk->sk_no_check_tx);
 	if (IS_ERR(skb))
 		return -EINVAL;
 
@@ -1693,20 +1649,8 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 	vxh->vx_flags = htonl(VXLAN_FLAGS);
 	vxh->vx_vni = vni;
 
-	__skb_push(skb, sizeof(*uh));
-	skb_reset_transport_header(skb);
-	uh = udp_hdr(skb);
-
-	uh->dest = dst_port;
-	uh->source = src_port;
-
-	uh->len = htons(skb->len);
-
-	udp_set_csum(vs->sock->sk->sk_no_check_tx, skb,
-		     src, dst, skb->len);
-
-	return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP,
-			     tos, ttl, df, xnet);
+	return udp_tunnel_xmit_skb(vs->uts.sock, rt, skb, src, dst, tos,
+				   ttl, df, src_port, dst_port, xnet);
 }
 EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
 
@@ -1831,18 +1775,18 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
 		ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
 
-		err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb,
-				     fl4.saddr, dst->sin.sin_addr.s_addr,
-				     tos, ttl, df, src_port, dst_port,
-				     htonl(vni << 8),
-				     !net_eq(vxlan->net, dev_net(vxlan->dev)));
+		err = udp_tunnel_xmit_skb(vxlan->vn_sock->uts.sock, rt, skb,
+					  fl4.saddr, dst->sin.sin_addr.s_addr,
+					  tos, ttl, df, src_port, dst_port,
+					  !net_eq(vxlan->net,
+						  dev_net(vxlan->dev)));
 
 		if (err < 0)
 			goto rt_tx_error;
 		iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
 #if IS_ENABLED(CONFIG_IPV6)
 	} else {
-		struct sock *sk = vxlan->vn_sock->sock->sk;
+		struct sock *sk = vxlan->vn_sock->uts.sock->sk;
 		struct dst_entry *ndst;
 		struct flowi6 fl6;
 		u32 flags;
@@ -2204,8 +2148,8 @@ void vxlan_get_rx_port(struct net_device *dev)
 	spin_lock(&vn->sock_lock);
 	for (i = 0; i < PORT_HASH_SIZE; ++i) {
 		hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) {
-			port = inet_sk(vs->sock->sk)->inet_sport;
-			sa_family = vs->sock->sk->sk_family;
+			port = inet_sk(vs->uts.sock->sk)->inet_sport;
+			sa_family = vs->uts.sock->sk->sk_family;
 			dev->netdev_ops->ndo_add_vxlan_port(dev, sa_family,
 							    port);
 		}
@@ -2335,79 +2279,60 @@ static const struct ethtool_ops vxlan_ethtool_ops = {
 static void vxlan_del_work(struct work_struct *work)
 {
 	struct vxlan_sock *vs = container_of(work, struct vxlan_sock, del_work);
-
-	sk_release_kernel(vs->sock->sk);
+	udp_tunnel_sock_release(&vs->uts);
 	kfree_rcu(vs, rcu);
 }
 
-static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
-					__be16 port, u32 flags)
+/* Create new listen socket if needed */
+static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
+					      vxlan_rcv_t rcv, void *data,
+					      u32 flags)
 {
-	struct socket *sock;
-	struct udp_port_cfg udp_conf;
-	int err;
+	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+	struct vxlan_sock *vs;
+	struct udp_tunnel_socket_cfg vxlan_ts_cfg;
+	bool ipv6 = !!(flags & VXLAN_F_IPV6);
+	unsigned int h;
 
-	memset(&udp_conf, 0, sizeof(udp_conf));
+	memset(&vxlan_ts_cfg, 0, sizeof(struct udp_tunnel_socket_cfg));
 
 	if (ipv6) {
-		udp_conf.family = AF_INET6;
-		udp_conf.use_udp6_tx_checksums =
+		vxlan_ts_cfg.port.family = AF_INET6;
+		vxlan_ts_cfg.port.use_udp6_tx_checksums =
 		    !!(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
-		udp_conf.use_udp6_rx_checksums =
+		vxlan_ts_cfg.port.use_udp6_rx_checksums =
 		    !!(flags & VXLAN_F_UDP_ZERO_CSUM6_RX);
 	} else {
-		udp_conf.family = AF_INET;
-		udp_conf.local_ip.s_addr = INADDR_ANY;
-		udp_conf.use_udp_checksums =
+		vxlan_ts_cfg.port.family = AF_INET;
+		vxlan_ts_cfg.port.local_ip.s_addr = INADDR_ANY;
+		vxlan_ts_cfg.port.use_udp_checksums =
 		    !!(flags & VXLAN_F_UDP_CSUM);
 	}
 
-	udp_conf.local_udp_port = port;
+	vxlan_ts_cfg.port.local_udp_port = port;
+	vxlan_ts_cfg.encap_type = 1;
+	vxlan_ts_cfg.encap_rcv = vxlan_udp_encap_recv;
+	vxlan_ts_cfg.encap_destroy = NULL;
 
-	/* Open UDP socket */
-	err = udp_sock_create(net, &udp_conf, &sock);
-	if (err < 0)
-		return ERR_PTR(err);
-
-	/* Disable multicast loopback */
-	inet_sk(sock->sk)->mc_loop = 0;
-
-	return sock;
-}
-
-/* Create new listen socket if needed */
-static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
-					      vxlan_rcv_t *rcv, void *data,
-					      u32 flags)
-{
-	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
-	struct vxlan_sock *vs;
-	struct socket *sock;
-	struct sock *sk;
-	unsigned int h;
-	bool ipv6 = !!(flags & VXLAN_F_IPV6);
-
-	vs = kzalloc(sizeof(*vs), GFP_KERNEL);
+	vs = (struct vxlan_sock *)create_udp_tunnel_sock(net, sizeof(*vs),
+							 NULL,
+							 &vxlan_ts_cfg);
 	if (!vs)
 		return ERR_PTR(-ENOMEM);
 
 	for (h = 0; h < VNI_HASH_SIZE; ++h)
 		INIT_HLIST_HEAD(&vs->vni_list[h]);
 
-	INIT_WORK(&vs->del_work, vxlan_del_work);
+	spin_lock(&vn->sock_lock);
+	list_add(&vs->next, &vn->vxlan_list);
+	spin_unlock(&vn->sock_lock);
 
-	sock = vxlan_create_sock(net, ipv6, port, flags);
-	if (IS_ERR(sock)) {
-		kfree(vs);
-		return ERR_CAST(sock);
-	}
+	INIT_WORK(&vs->del_work, vxlan_del_work);
 
-	vs->sock = sock;
-	sk = sock->sk;
 	atomic_set(&vs->refcnt, 1);
+
 	vs->rcv = rcv;
-	vs->data = data;
-	rcu_assign_sk_user_data(vs->sock->sk, vs);
+	vs->rcv_data = data;
 
 	/* Initialize the vxlan udp offloads structure */
 	vs->udp_offloads.port = port;
@@ -2419,24 +2344,13 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
 	vxlan_notify_add_rx_port(vs);
 	spin_unlock(&vn->sock_lock);
 
-	/* Mark socket as an encapsulation socket. */
-	udp_sk(sk)->encap_type = 1;
-	udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv;
-#if IS_ENABLED(CONFIG_IPV6)
-	if (ipv6)
-		ipv6_stub->udpv6_encap_enable();
-	else
-#endif
-		udp_encap_enable();
-
 	return vs;
 }
 
 struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
-				  vxlan_rcv_t *rcv, void *data,
+				  vxlan_rcv_t rcv, void *data,
 				  bool no_share, u32 flags)
 {
-	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
 	struct vxlan_sock *vs;
 
 	vs = vxlan_socket_create(net, port, rcv, data, flags);
@@ -2446,7 +2360,6 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
 	if (no_share)	/* Return error if sharing is not allowed. */
 		return vs;
 
-	spin_lock(&vn->sock_lock);
 	vs = vxlan_find_sock(net, port);
 	if (vs) {
 		if (vs->rcv == rcv)
@@ -2454,7 +2367,6 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
 		else
 			vs = ERR_PTR(-EBUSY);
 	}
-	spin_unlock(&vn->sock_lock);
 
 	if (!vs)
 		vs = ERR_PTR(-EINVAL);
@@ -2634,7 +2546,6 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
 	}
 
 	list_add(&vxlan->next, &vn->vxlan_list);
-
 	return 0;
 }
 
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index d5f59f3..10bfc13 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -4,23 +4,27 @@
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/udp.h>
+#include <net/udp_tunnel.h>
 
 #define VNI_HASH_BITS	10
 #define VNI_HASH_SIZE	(1<<VNI_HASH_BITS)
 
 struct vxlan_sock;
-typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, __be32 key);
 
-/* per UDP socket information */
+typedef void (*vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb,
+			    __be32 key);
+
+/* per vxlan socket information */
 struct vxlan_sock {
+	struct udp_tunnel_sock uts;  /* Must be the first member */
 	struct hlist_node hlist;
-	vxlan_rcv_t	 *rcv;
-	void		 *data;
+	struct list_head next;
 	struct work_struct del_work;
-	struct socket	 *sock;
 	struct rcu_head	  rcu;
 	struct hlist_head vni_list[VNI_HASH_SIZE];
 	atomic_t	  refcnt;
+	vxlan_rcv_t	  rcv;
+	void		  *rcv_data;
 	struct udp_offload udp_offloads;
 };
 
@@ -35,7 +39,7 @@ struct vxlan_sock {
 #define VXLAN_F_UDP_ZERO_CSUM6_RX	0x100
 
 struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
-				  vxlan_rcv_t *rcv, void *data,
+				  vxlan_rcv_t rcv, void *data,
 				  bool no_share, u32 flags);
 
 void vxlan_sock_release(struct vxlan_sock *vs);
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index d8b7e24..7599efd 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -59,7 +59,7 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
 static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
 {
 	struct ovs_key_ipv4_tunnel tun_key;
-	struct vport *vport = vs->data;
+	struct vport *vport = vs->rcv_data;
 	struct iphdr *iph;
 	__be64 key;
 
@@ -74,7 +74,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
 static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
 {
 	struct vxlan_port *vxlan_port = vxlan_vport(vport);
-	__be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
+	__be16 dst_port = inet_sk(vxlan_port->vs->uts.sock->sk)->inet_sport;
 
 	if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
 		return -EMSGSIZE;
@@ -139,7 +139,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 {
 	struct net *net = ovs_dp_get_net(vport->dp);
 	struct vxlan_port *vxlan_port = vxlan_vport(vport);
-	__be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
+	__be16 dst_port = inet_sk(vxlan_port->vs->uts.sock->sk)->inet_sport;
 	struct rtable *rt;
 	struct flowi4 fl;
 	__be16 src_port;
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [net-next v3 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions.
  2014-08-27  3:35 ` [net-next v3 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions Andy Zhou
@ 2014-08-27  4:15   ` Tom Herbert
       [not found]   ` <CA+mtBx8R2YyAMbPuw=iYDbLpSwRo_robnxOYPUFV2368-RzdLg@mail.gmail.com>
  1 sibling, 0 replies; 10+ messages in thread
From: Tom Herbert @ 2014-08-27  4:15 UTC (permalink / raw)
  To: Andy Zhou; +Cc: David Miller, Linux Netdev List

On Tue, Aug 26, 2014 at 8:35 PM, Andy Zhou <azhou@nicira.com> wrote:
> Signed-off-by: Andy Zhou <azhou@nicira.com>
> ---
>  drivers/net/vxlan.c           |  199 ++++++++++++-----------------------------
>  include/net/vxlan.h           |   16 ++--
>  net/openvswitch/vport-vxlan.c |    6 +-
>  3 files changed, 68 insertions(+), 153 deletions(-)
>
> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
> index beb377b..f1f1c48 100644
> --- a/drivers/net/vxlan.c
> +++ b/drivers/net/vxlan.c
> @@ -42,6 +42,7 @@
>  #include <net/netns/generic.h>
>  #include <net/vxlan.h>
>  #include <net/protocol.h>
> +#include <net/udp_tunnel.h>
>  #if IS_ENABLED(CONFIG_IPV6)
>  #include <net/ipv6.h>
>  #include <net/addrconf.h>
> @@ -277,13 +278,7 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
>  /* Find VXLAN socket based on network namespace and UDP port */
>  static struct vxlan_sock *vxlan_find_sock(struct net *net, __be16 port)
>  {
> -       struct vxlan_sock *vs;
> -
> -       hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
> -               if (inet_sk(vs->sock->sk)->inet_sport == port)
> -                       return vs;
> -       }
> -       return NULL;
> +       return (struct vxlan_sock *)udp_tunnel_find_sock(net, port);

It seems incorrect to assume that the socket returned is vxlan,
presumably this could be some other type of tunnel socket.

>  }
>
>  static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)
> @@ -636,7 +631,7 @@ static int vxlan_gro_complete(struct sk_buff *skb, int nhoff)
>  static void vxlan_notify_add_rx_port(struct vxlan_sock *vs)
>  {
>         struct net_device *dev;
> -       struct sock *sk = vs->sock->sk;
> +       struct sock *sk = vs->uts.sock->sk;
>         struct net *net = sock_net(sk);
>         sa_family_t sa_family = sk->sk_family;
>         __be16 port = inet_sk(sk)->inet_sport;
> @@ -661,7 +656,7 @@ static void vxlan_notify_add_rx_port(struct vxlan_sock *vs)
>  static void vxlan_notify_del_rx_port(struct vxlan_sock *vs)
>  {
>         struct net_device *dev;
> -       struct sock *sk = vs->sock->sk;
> +       struct sock *sk = vs->uts.sock->sk;
>         struct net *net = sock_net(sk);
>         sa_family_t sa_family = sk->sk_family;
>         __be16 port = inet_sk(sk)->inet_sport;
> @@ -1053,7 +1048,7 @@ static void vxlan_sock_hold(struct vxlan_sock *vs)
>
>  void vxlan_sock_release(struct vxlan_sock *vs)
>  {
> -       struct sock *sk = vs->sock->sk;
> +       struct sock *sk = vs->uts.sock->sk;
>         struct net *net = sock_net(sk);
>         struct vxlan_net *vn = net_generic(net, vxlan_net_id);
>
> @@ -1062,7 +1057,6 @@ void vxlan_sock_release(struct vxlan_sock *vs)
>
>         spin_lock(&vn->sock_lock);
>         hlist_del_rcu(&vs->hlist);
> -       rcu_assign_sk_user_data(vs->sock->sk, NULL);
>         vxlan_notify_del_rx_port(vs);
>         spin_unlock(&vn->sock_lock);
>
> @@ -1078,7 +1072,7 @@ static void vxlan_igmp_join(struct work_struct *work)
>  {
>         struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_join);
>         struct vxlan_sock *vs = vxlan->vn_sock;
> -       struct sock *sk = vs->sock->sk;
> +       struct sock *sk = vs->uts.sock->sk;
>         union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
>         int ifindex = vxlan->default_dst.remote_ifindex;
>
> @@ -1107,7 +1101,7 @@ static void vxlan_igmp_leave(struct work_struct *work)
>  {
>         struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_leave);
>         struct vxlan_sock *vs = vxlan->vn_sock;
> -       struct sock *sk = vs->sock->sk;
> +       struct sock *sk = vs->uts.sock->sk;
>         union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
>         int ifindex = vxlan->default_dst.remote_ifindex;
>
> @@ -1338,7 +1332,6 @@ out:
>  }
>
>  #if IS_ENABLED(CONFIG_IPV6)
> -
>  static struct sk_buff *vxlan_na_create(struct sk_buff *request,
>         struct neighbour *n, bool isrouter)
>  {
> @@ -1572,13 +1565,6 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
>         return false;
>  }
>
> -static inline struct sk_buff *vxlan_handle_offloads(struct sk_buff *skb,
> -                                                   bool udp_csum)
> -{
> -       int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
> -       return iptunnel_handle_offloads(skb, udp_csum, type);
> -}
> -
>  #if IS_ENABLED(CONFIG_IPV6)
>  static int vxlan6_xmit_skb(struct vxlan_sock *vs,
>                            struct dst_entry *dst, struct sk_buff *skb,
> @@ -1587,13 +1573,13 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
>                            __be16 src_port, __be16 dst_port, __be32 vni,
>                            bool xnet)
>  {
> -       struct ipv6hdr *ip6h;
>         struct vxlanhdr *vxh;
> -       struct udphdr *uh;
>         int min_headroom;
>         int err;
>
> -       skb = vxlan_handle_offloads(skb, !udp_get_no_check6_tx(vs->sock->sk));
> +       skb = udp_tunnel_handle_offloads(skb,
> +                                        !udp_get_no_check6_tx(
> +                                                vs->uts.sock->sk));
>         if (IS_ERR(skb))
>                 return -EINVAL;
>
> @@ -1621,38 +1607,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
>         vxh->vx_flags = htonl(VXLAN_FLAGS);
>         vxh->vx_vni = vni;
>
> -       __skb_push(skb, sizeof(*uh));
> -       skb_reset_transport_header(skb);
> -       uh = udp_hdr(skb);
> -
> -       uh->dest = dst_port;
> -       uh->source = src_port;
> -
> -       uh->len = htons(skb->len);
> -
> -       memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
> -       IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
> -                             IPSKB_REROUTED);
> -       skb_dst_set(skb, dst);
> -
> -       udp6_set_csum(udp_get_no_check6_tx(vs->sock->sk), skb,
> -                     saddr, daddr, skb->len);
> -
> -       __skb_push(skb, sizeof(*ip6h));
> -       skb_reset_network_header(skb);
> -       ip6h              = ipv6_hdr(skb);
> -       ip6h->version     = 6;
> -       ip6h->priority    = prio;
> -       ip6h->flow_lbl[0] = 0;
> -       ip6h->flow_lbl[1] = 0;
> -       ip6h->flow_lbl[2] = 0;
> -       ip6h->payload_len = htons(skb->len);
> -       ip6h->nexthdr     = IPPROTO_UDP;
> -       ip6h->hop_limit   = ttl;
> -       ip6h->daddr       = *daddr;
> -       ip6h->saddr       = *saddr;
> -
> -       ip6tunnel_xmit(skb, dev);
> +       udp_tunnel6_xmit_skb(vs->uts.sock, dst, skb, dev, saddr, daddr, prio,
> +                            ttl, src_port, dst_port);
>         return 0;
>  }
>  #endif
> @@ -1663,11 +1619,11 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
>                    __be16 src_port, __be16 dst_port, __be32 vni, bool xnet)
>  {
>         struct vxlanhdr *vxh;
> -       struct udphdr *uh;
>         int min_headroom;
>         int err;
>
> -       skb = vxlan_handle_offloads(skb, !vs->sock->sk->sk_no_check_tx);
> +       skb = udp_tunnel_handle_offloads(skb,
> +                                        !vs->uts.sock->sk->sk_no_check_tx);
>         if (IS_ERR(skb))
>                 return -EINVAL;
>
> @@ -1693,20 +1649,8 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
>         vxh->vx_flags = htonl(VXLAN_FLAGS);
>         vxh->vx_vni = vni;
>
> -       __skb_push(skb, sizeof(*uh));
> -       skb_reset_transport_header(skb);
> -       uh = udp_hdr(skb);
> -
> -       uh->dest = dst_port;
> -       uh->source = src_port;
> -
> -       uh->len = htons(skb->len);
> -
> -       udp_set_csum(vs->sock->sk->sk_no_check_tx, skb,
> -                    src, dst, skb->len);
> -
> -       return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP,
> -                            tos, ttl, df, xnet);
> +       return udp_tunnel_xmit_skb(vs->uts.sock, rt, skb, src, dst, tos,
> +                                  ttl, df, src_port, dst_port, xnet);
>  }
>  EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
>
> @@ -1831,18 +1775,18 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
>                 tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
>                 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
>
> -               err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb,
> -                                    fl4.saddr, dst->sin.sin_addr.s_addr,
> -                                    tos, ttl, df, src_port, dst_port,
> -                                    htonl(vni << 8),
> -                                    !net_eq(vxlan->net, dev_net(vxlan->dev)));
> +               err = udp_tunnel_xmit_skb(vxlan->vn_sock->uts.sock, rt, skb,
> +                                         fl4.saddr, dst->sin.sin_addr.s_addr,
> +                                         tos, ttl, df, src_port, dst_port,
> +                                         !net_eq(vxlan->net,
> +                                                 dev_net(vxlan->dev)));
>
>                 if (err < 0)
>                         goto rt_tx_error;
>                 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
>  #if IS_ENABLED(CONFIG_IPV6)
>         } else {
> -               struct sock *sk = vxlan->vn_sock->sock->sk;
> +               struct sock *sk = vxlan->vn_sock->uts.sock->sk;
>                 struct dst_entry *ndst;
>                 struct flowi6 fl6;
>                 u32 flags;
> @@ -2204,8 +2148,8 @@ void vxlan_get_rx_port(struct net_device *dev)
>         spin_lock(&vn->sock_lock);
>         for (i = 0; i < PORT_HASH_SIZE; ++i) {
>                 hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) {
> -                       port = inet_sk(vs->sock->sk)->inet_sport;
> -                       sa_family = vs->sock->sk->sk_family;
> +                       port = inet_sk(vs->uts.sock->sk)->inet_sport;
> +                       sa_family = vs->uts.sock->sk->sk_family;
>                         dev->netdev_ops->ndo_add_vxlan_port(dev, sa_family,
>                                                             port);
>                 }
> @@ -2335,79 +2279,60 @@ static const struct ethtool_ops vxlan_ethtool_ops = {
>  static void vxlan_del_work(struct work_struct *work)
>  {
>         struct vxlan_sock *vs = container_of(work, struct vxlan_sock, del_work);
> -
> -       sk_release_kernel(vs->sock->sk);
> +       udp_tunnel_sock_release(&vs->uts);
>         kfree_rcu(vs, rcu);
>  }
>
> -static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
> -                                       __be16 port, u32 flags)
> +/* Create new listen socket if needed */
> +static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
> +                                             vxlan_rcv_t rcv, void *data,
> +                                             u32 flags)
>  {
> -       struct socket *sock;
> -       struct udp_port_cfg udp_conf;
> -       int err;
> +       struct vxlan_net *vn = net_generic(net, vxlan_net_id);
> +       struct vxlan_sock *vs;
> +       struct udp_tunnel_socket_cfg vxlan_ts_cfg;
> +       bool ipv6 = !!(flags & VXLAN_F_IPV6);
> +       unsigned int h;
>
> -       memset(&udp_conf, 0, sizeof(udp_conf));
> +       memset(&vxlan_ts_cfg, 0, sizeof(struct udp_tunnel_socket_cfg));
>
>         if (ipv6) {
> -               udp_conf.family = AF_INET6;
> -               udp_conf.use_udp6_tx_checksums =
> +               vxlan_ts_cfg.port.family = AF_INET6;
> +               vxlan_ts_cfg.port.use_udp6_tx_checksums =
>                     !!(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
> -               udp_conf.use_udp6_rx_checksums =
> +               vxlan_ts_cfg.port.use_udp6_rx_checksums =
>                     !!(flags & VXLAN_F_UDP_ZERO_CSUM6_RX);
>         } else {
> -               udp_conf.family = AF_INET;
> -               udp_conf.local_ip.s_addr = INADDR_ANY;
> -               udp_conf.use_udp_checksums =
> +               vxlan_ts_cfg.port.family = AF_INET;
> +               vxlan_ts_cfg.port.local_ip.s_addr = INADDR_ANY;
> +               vxlan_ts_cfg.port.use_udp_checksums =
>                     !!(flags & VXLAN_F_UDP_CSUM);
>         }
>
> -       udp_conf.local_udp_port = port;
> +       vxlan_ts_cfg.port.local_udp_port = port;
> +       vxlan_ts_cfg.encap_type = 1;
> +       vxlan_ts_cfg.encap_rcv = vxlan_udp_encap_recv;
> +       vxlan_ts_cfg.encap_destroy = NULL;
>
> -       /* Open UDP socket */
> -       err = udp_sock_create(net, &udp_conf, &sock);
> -       if (err < 0)
> -               return ERR_PTR(err);
> -
> -       /* Disable multicast loopback */
> -       inet_sk(sock->sk)->mc_loop = 0;
> -
> -       return sock;
> -}
> -
> -/* Create new listen socket if needed */
> -static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
> -                                             vxlan_rcv_t *rcv, void *data,
> -                                             u32 flags)
> -{
> -       struct vxlan_net *vn = net_generic(net, vxlan_net_id);
> -       struct vxlan_sock *vs;
> -       struct socket *sock;
> -       struct sock *sk;
> -       unsigned int h;
> -       bool ipv6 = !!(flags & VXLAN_F_IPV6);
> -
> -       vs = kzalloc(sizeof(*vs), GFP_KERNEL);
> +       vs = (struct vxlan_sock *)create_udp_tunnel_sock(net, sizeof(*vs),
> +                                                        NULL,
> +                                                        &vxlan_ts_cfg);
>         if (!vs)
>                 return ERR_PTR(-ENOMEM);
>
>         for (h = 0; h < VNI_HASH_SIZE; ++h)
>                 INIT_HLIST_HEAD(&vs->vni_list[h]);
>
> -       INIT_WORK(&vs->del_work, vxlan_del_work);
> +       spin_lock(&vn->sock_lock);
> +       list_add(&vs->next, &vn->vxlan_list);
> +       spin_unlock(&vn->sock_lock);
>
> -       sock = vxlan_create_sock(net, ipv6, port, flags);
> -       if (IS_ERR(sock)) {
> -               kfree(vs);
> -               return ERR_CAST(sock);
> -       }
> +       INIT_WORK(&vs->del_work, vxlan_del_work);
>
> -       vs->sock = sock;
> -       sk = sock->sk;
>         atomic_set(&vs->refcnt, 1);
> +
>         vs->rcv = rcv;
> -       vs->data = data;
> -       rcu_assign_sk_user_data(vs->sock->sk, vs);
> +       vs->rcv_data = data;
>
>         /* Initialize the vxlan udp offloads structure */
>         vs->udp_offloads.port = port;
> @@ -2419,24 +2344,13 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
>         vxlan_notify_add_rx_port(vs);
>         spin_unlock(&vn->sock_lock);
>
> -       /* Mark socket as an encapsulation socket. */
> -       udp_sk(sk)->encap_type = 1;
> -       udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv;
> -#if IS_ENABLED(CONFIG_IPV6)
> -       if (ipv6)
> -               ipv6_stub->udpv6_encap_enable();
> -       else
> -#endif
> -               udp_encap_enable();
> -
>         return vs;
>  }
>
>  struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
> -                                 vxlan_rcv_t *rcv, void *data,
> +                                 vxlan_rcv_t rcv, void *data,
>                                   bool no_share, u32 flags)
>  {
> -       struct vxlan_net *vn = net_generic(net, vxlan_net_id);
>         struct vxlan_sock *vs;
>
>         vs = vxlan_socket_create(net, port, rcv, data, flags);
> @@ -2446,7 +2360,6 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
>         if (no_share)   /* Return error if sharing is not allowed. */
>                 return vs;
>
> -       spin_lock(&vn->sock_lock);
>         vs = vxlan_find_sock(net, port);
>         if (vs) {
>                 if (vs->rcv == rcv)
> @@ -2454,7 +2367,6 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
>                 else
>                         vs = ERR_PTR(-EBUSY);
>         }
> -       spin_unlock(&vn->sock_lock);
>
>         if (!vs)
>                 vs = ERR_PTR(-EINVAL);
> @@ -2634,7 +2546,6 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
>         }
>
>         list_add(&vxlan->next, &vn->vxlan_list);
> -
>         return 0;
>  }
>
> diff --git a/include/net/vxlan.h b/include/net/vxlan.h
> index d5f59f3..10bfc13 100644
> --- a/include/net/vxlan.h
> +++ b/include/net/vxlan.h
> @@ -4,23 +4,27 @@
>  #include <linux/skbuff.h>
>  #include <linux/netdevice.h>
>  #include <linux/udp.h>
> +#include <net/udp_tunnel.h>
>
>  #define VNI_HASH_BITS  10
>  #define VNI_HASH_SIZE  (1<<VNI_HASH_BITS)
>
>  struct vxlan_sock;
> -typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, __be32 key);
>
> -/* per UDP socket information */
> +typedef void (*vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb,
> +                           __be32 key);
> +
> +/* per vxlan socket information */
>  struct vxlan_sock {
> +       struct udp_tunnel_sock uts;  /* Must be the first member */
>         struct hlist_node hlist;
> -       vxlan_rcv_t      *rcv;
> -       void             *data;
> +       struct list_head next;
>         struct work_struct del_work;
> -       struct socket    *sock;
>         struct rcu_head   rcu;
>         struct hlist_head vni_list[VNI_HASH_SIZE];
>         atomic_t          refcnt;
> +       vxlan_rcv_t       rcv;
> +       void              *rcv_data;
>         struct udp_offload udp_offloads;
>  };
>
> @@ -35,7 +39,7 @@ struct vxlan_sock {
>  #define VXLAN_F_UDP_ZERO_CSUM6_RX      0x100
>
>  struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
> -                                 vxlan_rcv_t *rcv, void *data,
> +                                 vxlan_rcv_t rcv, void *data,
>                                   bool no_share, u32 flags);
>
>  void vxlan_sock_release(struct vxlan_sock *vs);
> diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
> index d8b7e24..7599efd 100644
> --- a/net/openvswitch/vport-vxlan.c
> +++ b/net/openvswitch/vport-vxlan.c
> @@ -59,7 +59,7 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
>  static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
>  {
>         struct ovs_key_ipv4_tunnel tun_key;
> -       struct vport *vport = vs->data;
> +       struct vport *vport = vs->rcv_data;
>         struct iphdr *iph;
>         __be64 key;
>
> @@ -74,7 +74,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
>  static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
>  {
>         struct vxlan_port *vxlan_port = vxlan_vport(vport);
> -       __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
> +       __be16 dst_port = inet_sk(vxlan_port->vs->uts.sock->sk)->inet_sport;
>
>         if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
>                 return -EMSGSIZE;
> @@ -139,7 +139,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
>  {
>         struct net *net = ovs_dp_get_net(vport->dp);
>         struct vxlan_port *vxlan_port = vxlan_vport(vport);
> -       __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
> +       __be16 dst_port = inet_sk(vxlan_port->vs->uts.sock->sk)->inet_sport;
>         struct rtable *rt;
>         struct flowi4 fl;
>         __be16 src_port;
> --
> 1.7.9.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

[parent not found: <CA+mtBx8R2YyAMbPuw=iYDbLpSwRo_robnxOYPUFV2368-RzdLg@mail.gmail.com>]

* Re: [net-next v3 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions.
       [not found]   ` <CA+mtBx8R2YyAMbPuw=iYDbLpSwRo_robnxOYPUFV2368-RzdLg@mail.gmail.com>
@ 2014-08-27  5:19     ` Andy Zhou
  0 siblings, 0 replies; 10+ messages in thread
From: Andy Zhou @ 2014-08-27  5:19 UTC (permalink / raw)
  To: Tom Herbert; +Cc: David Miller, Linux Netdev List

On Tue, Aug 26, 2014 at 9:14 PM, Tom Herbert <therbert@google.com> wrote:
>
>
> On Tue, Aug 26, 2014 at 8:35 PM, Andy Zhou <azhou@nicira.com> wrote:
>> Signed-off-by: Andy Zhou <azhou@nicira.com>
>> ---
>>  drivers/net/vxlan.c           |  199
>> ++++++++++++-----------------------------
>>  include/net/vxlan.h           |   16 ++--
>>  net/openvswitch/vport-vxlan.c |    6 +-
>>  3 files changed, 68 insertions(+), 153 deletions(-)
>>
>> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
>> index beb377b..f1f1c48 100644
>> --- a/drivers/net/vxlan.c
>> +++ b/drivers/net/vxlan.c
>> @@ -42,6 +42,7 @@
>>  #include <net/netns/generic.h>
>>  #include <net/vxlan.h>
>>  #include <net/protocol.h>
>> +#include <net/udp_tunnel.h>
>>  #if IS_ENABLED(CONFIG_IPV6)
>>  #include <net/ipv6.h>
>>  #include <net/addrconf.h>
>> @@ -277,13 +278,7 @@ static inline struct vxlan_rdst
>> *first_remote_rtnl(struct vxlan_fdb *fdb)
>>  /* Find VXLAN socket based on network namespace and UDP port */
>>  static struct vxlan_sock *vxlan_find_sock(struct net *net, __be16 port)
>>  {
>> -       struct vxlan_sock *vs;
>> -
>> -       hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
>> -               if (inet_sk(vs->sock->sk)->inet_sport == port)
>> -                       return vs;
>> -       }
>> -       return NULL;
>> +       return (struct vxlan_sock *)udp_tunnel_find_sock(net, port);
>
> It seems incorrect to assume that the socket returned is vxlan, presumably
> this could be some other type of tunnel socket.
>
If I drop udp_tunnel_find_sock API from last patch, then this should
not be an issue any more right?
vxlan driver will just keep track of its own open sock.
>>  }
>>
>>  static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)
>> @@ -636,7 +631,7 @@ static int vxlan_gro_complete(struct sk_buff *skb, int
>> nhoff)
>>  static void vxlan_notify_add_rx_port(struct vxlan_sock *vs)
>>  {
>>         struct net_device *dev;
>> -       struct sock *sk = vs->sock->sk;
>> +       struct sock *sk = vs->uts.sock->sk;
>>         struct net *net = sock_net(sk);
>>         sa_family_t sa_family = sk->sk_family;
>>         __be16 port = inet_sk(sk)->inet_sport;
>> @@ -661,7 +656,7 @@ static void vxlan_notify_add_rx_port(struct vxlan_sock
>> *vs)
>>  static void vxlan_notify_del_rx_port(struct vxlan_sock *vs)
>>  {
>>         struct net_device *dev;
>> -       struct sock *sk = vs->sock->sk;
>> +       struct sock *sk = vs->uts.sock->sk;
>>         struct net *net = sock_net(sk);
>>         sa_family_t sa_family = sk->sk_family;
>>         __be16 port = inet_sk(sk)->inet_sport;
>> @@ -1053,7 +1048,7 @@ static void vxlan_sock_hold(struct vxlan_sock *vs)
>>
>>  void vxlan_sock_release(struct vxlan_sock *vs)
>>  {
>> -       struct sock *sk = vs->sock->sk;
>> +       struct sock *sk = vs->uts.sock->sk;
>>         struct net *net = sock_net(sk);
>>         struct vxlan_net *vn = net_generic(net, vxlan_net_id);
>>
>> @@ -1062,7 +1057,6 @@ void vxlan_sock_release(struct vxlan_sock *vs)
>>
>>         spin_lock(&vn->sock_lock);
>>         hlist_del_rcu(&vs->hlist);
>> -       rcu_assign_sk_user_data(vs->sock->sk, NULL);
>>         vxlan_notify_del_rx_port(vs);
>>         spin_unlock(&vn->sock_lock);
>>
>> @@ -1078,7 +1072,7 @@ static void vxlan_igmp_join(struct work_struct
>> *work)
>>  {
>>         struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev,
>> igmp_join);
>>         struct vxlan_sock *vs = vxlan->vn_sock;
>> -       struct sock *sk = vs->sock->sk;
>> +       struct sock *sk = vs->uts.sock->sk;
>>         union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
>>         int ifindex = vxlan->default_dst.remote_ifindex;
>>
>> @@ -1107,7 +1101,7 @@ static void vxlan_igmp_leave(struct work_struct
>> *work)
>>  {
>>         struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev,
>> igmp_leave);
>>         struct vxlan_sock *vs = vxlan->vn_sock;
>> -       struct sock *sk = vs->sock->sk;
>> +       struct sock *sk = vs->uts.sock->sk;
>>         union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
>>         int ifindex = vxlan->default_dst.remote_ifindex;
>>
>> @@ -1338,7 +1332,6 @@ out:
>>  }
>>
>>  #if IS_ENABLED(CONFIG_IPV6)
>> -
>>  static struct sk_buff *vxlan_na_create(struct sk_buff *request,
>>         struct neighbour *n, bool isrouter)
>>  {
>> @@ -1572,13 +1565,6 @@ static bool route_shortcircuit(struct net_device
>> *dev, struct sk_buff *skb)
>>         return false;
>>  }
>>
>> -static inline struct sk_buff *vxlan_handle_offloads(struct sk_buff *skb,
>> -                                                   bool udp_csum)
>> -{
>> -       int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM :
>> SKB_GSO_UDP_TUNNEL;
>> -       return iptunnel_handle_offloads(skb, udp_csum, type);
>> -}
>> -
>>  #if IS_ENABLED(CONFIG_IPV6)
>>  static int vxlan6_xmit_skb(struct vxlan_sock *vs,
>>                            struct dst_entry *dst, struct sk_buff *skb,
>> @@ -1587,13 +1573,13 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
>>                            __be16 src_port, __be16 dst_port, __be32 vni,
>>                            bool xnet)
>>  {
>> -       struct ipv6hdr *ip6h;
>>         struct vxlanhdr *vxh;
>> -       struct udphdr *uh;
>>         int min_headroom;
>>         int err;
>>
>> -       skb = vxlan_handle_offloads(skb,
>> !udp_get_no_check6_tx(vs->sock->sk));
>> +       skb = udp_tunnel_handle_offloads(skb,
>> +                                        !udp_get_no_check6_tx(
>> +                                                vs->uts.sock->sk));
>>         if (IS_ERR(skb))
>>                 return -EINVAL;
>>
>> @@ -1621,38 +1607,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
>>         vxh->vx_flags = htonl(VXLAN_FLAGS);
>>         vxh->vx_vni = vni;
>>
>> -       __skb_push(skb, sizeof(*uh));
>> -       skb_reset_transport_header(skb);
>> -       uh = udp_hdr(skb);
>> -
>> -       uh->dest = dst_port;
>> -       uh->source = src_port;
>> -
>> -       uh->len = htons(skb->len);
>> -
>> -       memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
>> -       IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE |
>> IPSKB_XFRM_TRANSFORMED |
>> -                             IPSKB_REROUTED);
>> -       skb_dst_set(skb, dst);
>> -
>> -       udp6_set_csum(udp_get_no_check6_tx(vs->sock->sk), skb,
>> -                     saddr, daddr, skb->len);
>> -
>> -       __skb_push(skb, sizeof(*ip6h));
>> -       skb_reset_network_header(skb);
>> -       ip6h              = ipv6_hdr(skb);
>> -       ip6h->version     = 6;
>> -       ip6h->priority    = prio;
>> -       ip6h->flow_lbl[0] = 0;
>> -       ip6h->flow_lbl[1] = 0;
>> -       ip6h->flow_lbl[2] = 0;
>> -       ip6h->payload_len = htons(skb->len);
>> -       ip6h->nexthdr     = IPPROTO_UDP;
>> -       ip6h->hop_limit   = ttl;
>> -       ip6h->daddr       = *daddr;
>> -       ip6h->saddr       = *saddr;
>> -
>> -       ip6tunnel_xmit(skb, dev);
>> +       udp_tunnel6_xmit_skb(vs->uts.sock, dst, skb, dev, saddr, daddr,
>> prio,
>> +                            ttl, src_port, dst_port);
>>         return 0;
>>  }
>>  #endif
>> @@ -1663,11 +1619,11 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
>>                    __be16 src_port, __be16 dst_port, __be32 vni, bool
>> xnet)
>>  {
>>         struct vxlanhdr *vxh;
>> -       struct udphdr *uh;
>>         int min_headroom;
>>         int err;
>>
>> -       skb = vxlan_handle_offloads(skb, !vs->sock->sk->sk_no_check_tx);
>> +       skb = udp_tunnel_handle_offloads(skb,
>> +
>> !vs->uts.sock->sk->sk_no_check_tx);
>>         if (IS_ERR(skb))
>>                 return -EINVAL;
>>
>> @@ -1693,20 +1649,8 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
>>         vxh->vx_flags = htonl(VXLAN_FLAGS);
>>         vxh->vx_vni = vni;
>>
>> -       __skb_push(skb, sizeof(*uh));
>> -       skb_reset_transport_header(skb);
>> -       uh = udp_hdr(skb);
>> -
>> -       uh->dest = dst_port;
>> -       uh->source = src_port;
>> -
>> -       uh->len = htons(skb->len);
>> -
>> -       udp_set_csum(vs->sock->sk->sk_no_check_tx, skb,
>> -                    src, dst, skb->len);
>> -
>> -       return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP,
>> -                            tos, ttl, df, xnet);
>> +       return udp_tunnel_xmit_skb(vs->uts.sock, rt, skb, src, dst, tos,
>> +                                  ttl, df, src_port, dst_port, xnet);
>>  }
>>  EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
>>
>> @@ -1831,18 +1775,18 @@ static void vxlan_xmit_one(struct sk_buff *skb,
>> struct net_device *dev,
>>                 tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
>>                 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
>>
>> -               err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb,
>> -                                    fl4.saddr, dst->sin.sin_addr.s_addr,
>> -                                    tos, ttl, df, src_port, dst_port,
>> -                                    htonl(vni << 8),
>> -                                    !net_eq(vxlan->net,
>> dev_net(vxlan->dev)));
>> +               err = udp_tunnel_xmit_skb(vxlan->vn_sock->uts.sock, rt,
>> skb,
>> +                                         fl4.saddr,
>> dst->sin.sin_addr.s_addr,
>> +                                         tos, ttl, df, src_port,
>> dst_port,
>> +                                         !net_eq(vxlan->net,
>> +                                                 dev_net(vxlan->dev)));
>>
>>                 if (err < 0)
>>                         goto rt_tx_error;
>>                 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
>>  #if IS_ENABLED(CONFIG_IPV6)
>>         } else {
>> -               struct sock *sk = vxlan->vn_sock->sock->sk;
>> +               struct sock *sk = vxlan->vn_sock->uts.sock->sk;
>>                 struct dst_entry *ndst;
>>                 struct flowi6 fl6;
>>                 u32 flags;
>> @@ -2204,8 +2148,8 @@ void vxlan_get_rx_port(struct net_device *dev)
>>         spin_lock(&vn->sock_lock);
>>         for (i = 0; i < PORT_HASH_SIZE; ++i) {
>>                 hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) {
>> -                       port = inet_sk(vs->sock->sk)->inet_sport;
>> -                       sa_family = vs->sock->sk->sk_family;
>> +                       port = inet_sk(vs->uts.sock->sk)->inet_sport;
>> +                       sa_family = vs->uts.sock->sk->sk_family;
>>                         dev->netdev_ops->ndo_add_vxlan_port(dev,
>> sa_family,
>>                                                             port);
>>                 }
>> @@ -2335,79 +2279,60 @@ static const struct ethtool_ops vxlan_ethtool_ops
>> = {
>>  static void vxlan_del_work(struct work_struct *work)
>>  {
>>         struct vxlan_sock *vs = container_of(work, struct vxlan_sock,
>> del_work);
>> -
>> -       sk_release_kernel(vs->sock->sk);
>> +       udp_tunnel_sock_release(&vs->uts);
>>         kfree_rcu(vs, rcu);
>>  }
>>
>> -static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
>> -                                       __be16 port, u32 flags)
>> +/* Create new listen socket if needed */
>> +static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16
>> port,
>> +                                             vxlan_rcv_t rcv, void *data,
>> +                                             u32 flags)
>>  {
>> -       struct socket *sock;
>> -       struct udp_port_cfg udp_conf;
>> -       int err;
>> +       struct vxlan_net *vn = net_generic(net, vxlan_net_id);
>> +       struct vxlan_sock *vs;
>> +       struct udp_tunnel_socket_cfg vxlan_ts_cfg;
>> +       bool ipv6 = !!(flags & VXLAN_F_IPV6);
>> +       unsigned int h;
>>
>> -       memset(&udp_conf, 0, sizeof(udp_conf));
>> +       memset(&vxlan_ts_cfg, 0, sizeof(struct udp_tunnel_socket_cfg));
>>
>>         if (ipv6) {
>> -               udp_conf.family = AF_INET6;
>> -               udp_conf.use_udp6_tx_checksums =
>> +               vxlan_ts_cfg.port.family = AF_INET6;
>> +               vxlan_ts_cfg.port.use_udp6_tx_checksums =
>>                     !!(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
>> -               udp_conf.use_udp6_rx_checksums =
>> +               vxlan_ts_cfg.port.use_udp6_rx_checksums =
>>                     !!(flags & VXLAN_F_UDP_ZERO_CSUM6_RX);
>>         } else {
>> -               udp_conf.family = AF_INET;
>> -               udp_conf.local_ip.s_addr = INADDR_ANY;
>> -               udp_conf.use_udp_checksums =
>> +               vxlan_ts_cfg.port.family = AF_INET;
>> +               vxlan_ts_cfg.port.local_ip.s_addr = INADDR_ANY;
>> +               vxlan_ts_cfg.port.use_udp_checksums =
>>                     !!(flags & VXLAN_F_UDP_CSUM);
>>         }
>>
>> -       udp_conf.local_udp_port = port;
>> +       vxlan_ts_cfg.port.local_udp_port = port;
>> +       vxlan_ts_cfg.encap_type = 1;
>> +       vxlan_ts_cfg.encap_rcv = vxlan_udp_encap_recv;
>> +       vxlan_ts_cfg.encap_destroy = NULL;
>>
>> -       /* Open UDP socket */
>> -       err = udp_sock_create(net, &udp_conf, &sock);
>> -       if (err < 0)
>> -               return ERR_PTR(err);
>> -
>> -       /* Disable multicast loopback */
>> -       inet_sk(sock->sk)->mc_loop = 0;
>> -
>> -       return sock;
>> -}
>> -
>> -/* Create new listen socket if needed */
>> -static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16
>> port,
>> -                                             vxlan_rcv_t *rcv, void
>> *data,
>> -                                             u32 flags)
>> -{
>> -       struct vxlan_net *vn = net_generic(net, vxlan_net_id);
>> -       struct vxlan_sock *vs;
>> -       struct socket *sock;
>> -       struct sock *sk;
>> -       unsigned int h;
>> -       bool ipv6 = !!(flags & VXLAN_F_IPV6);
>> -
>> -       vs = kzalloc(sizeof(*vs), GFP_KERNEL);
>> +       vs = (struct vxlan_sock *)create_udp_tunnel_sock(net, sizeof(*vs),
>> +                                                        NULL,
>> +                                                        &vxlan_ts_cfg);
>>         if (!vs)
>>                 return ERR_PTR(-ENOMEM);
>>
>>         for (h = 0; h < VNI_HASH_SIZE; ++h)
>>                 INIT_HLIST_HEAD(&vs->vni_list[h]);
>>
>> -       INIT_WORK(&vs->del_work, vxlan_del_work);
>> +       spin_lock(&vn->sock_lock);
>> +       list_add(&vs->next, &vn->vxlan_list);
>> +       spin_unlock(&vn->sock_lock);
>>
>> -       sock = vxlan_create_sock(net, ipv6, port, flags);
>> -       if (IS_ERR(sock)) {
>> -               kfree(vs);
>> -               return ERR_CAST(sock);
>> -       }
>> +       INIT_WORK(&vs->del_work, vxlan_del_work);
>>
>> -       vs->sock = sock;
>> -       sk = sock->sk;
>>         atomic_set(&vs->refcnt, 1);
>> +
>>         vs->rcv = rcv;
>> -       vs->data = data;
>> -       rcu_assign_sk_user_data(vs->sock->sk, vs);
>> +       vs->rcv_data = data;
>>
>>         /* Initialize the vxlan udp offloads structure */
>>         vs->udp_offloads.port = port;
>> @@ -2419,24 +2344,13 @@ static struct vxlan_sock
>> *vxlan_socket_create(struct net *net, __be16 port,
>>         vxlan_notify_add_rx_port(vs);
>>         spin_unlock(&vn->sock_lock);
>>
>> -       /* Mark socket as an encapsulation socket. */
>> -       udp_sk(sk)->encap_type = 1;
>> -       udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv;
>> -#if IS_ENABLED(CONFIG_IPV6)
>> -       if (ipv6)
>> -               ipv6_stub->udpv6_encap_enable();
>> -       else
>> -#endif
>> -               udp_encap_enable();
>> -
>>         return vs;
>>  }
>>
>>  struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
>> -                                 vxlan_rcv_t *rcv, void *data,
>> +                                 vxlan_rcv_t rcv, void *data,
>>                                   bool no_share, u32 flags)
>>  {
>> -       struct vxlan_net *vn = net_generic(net, vxlan_net_id);
>>         struct vxlan_sock *vs;
>>
>>         vs = vxlan_socket_create(net, port, rcv, data, flags);
>> @@ -2446,7 +2360,6 @@ struct vxlan_sock *vxlan_sock_add(struct net *net,
>> __be16 port,
>>         if (no_share)   /* Return error if sharing is not allowed. */
>>                 return vs;
>>
>> -       spin_lock(&vn->sock_lock);
>>         vs = vxlan_find_sock(net, port);
>>         if (vs) {
>>                 if (vs->rcv == rcv)
>> @@ -2454,7 +2367,6 @@ struct vxlan_sock *vxlan_sock_add(struct net *net,
>> __be16 port,
>>                 else
>>                         vs = ERR_PTR(-EBUSY);
>>         }
>> -       spin_unlock(&vn->sock_lock);
>>
>>         if (!vs)
>>                 vs = ERR_PTR(-EINVAL);
>> @@ -2634,7 +2546,6 @@ static int vxlan_newlink(struct net *net, struct
>> net_device *dev,
>>         }
>>
>>         list_add(&vxlan->next, &vn->vxlan_list);
>> -
>>         return 0;
>>  }
>>
>> diff --git a/include/net/vxlan.h b/include/net/vxlan.h
>> index d5f59f3..10bfc13 100644
>> --- a/include/net/vxlan.h
>> +++ b/include/net/vxlan.h
>> @@ -4,23 +4,27 @@
>>  #include <linux/skbuff.h>
>>  #include <linux/netdevice.h>
>>  #include <linux/udp.h>
>> +#include <net/udp_tunnel.h>
>>
>>  #define VNI_HASH_BITS  10
>>  #define VNI_HASH_SIZE  (1<<VNI_HASH_BITS)
>>
>>  struct vxlan_sock;
>> -typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb,
>> __be32 key);
>>
>> -/* per UDP socket information */
>> +typedef void (*vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb,
>> +                           __be32 key);
>> +
>> +/* per vxlan socket information */
>>  struct vxlan_sock {
>> +       struct udp_tunnel_sock uts;  /* Must be the first member */
>>         struct hlist_node hlist;
>> -       vxlan_rcv_t      *rcv;
>> -       void             *data;
>> +       struct list_head next;
>>         struct work_struct del_work;
>> -       struct socket    *sock;
>>         struct rcu_head   rcu;
>>         struct hlist_head vni_list[VNI_HASH_SIZE];
>>         atomic_t          refcnt;
>> +       vxlan_rcv_t       rcv;
>> +       void              *rcv_data;
>>         struct udp_offload udp_offloads;
>>  };
>>
>> @@ -35,7 +39,7 @@ struct vxlan_sock {
>>  #define VXLAN_F_UDP_ZERO_CSUM6_RX      0x100
>>
>>  struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
>> -                                 vxlan_rcv_t *rcv, void *data,
>> +                                 vxlan_rcv_t rcv, void *data,
>>                                   bool no_share, u32 flags);
>>
>>  void vxlan_sock_release(struct vxlan_sock *vs);
>> diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
>> index d8b7e24..7599efd 100644
>> --- a/net/openvswitch/vport-vxlan.c
>> +++ b/net/openvswitch/vport-vxlan.c
>> @@ -59,7 +59,7 @@ static inline struct vxlan_port *vxlan_vport(const
>> struct vport *vport)
>>  static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32
>> vx_vni)
>>  {
>>         struct ovs_key_ipv4_tunnel tun_key;
>> -       struct vport *vport = vs->data;
>> +       struct vport *vport = vs->rcv_data;
>>         struct iphdr *iph;
>>         __be64 key;
>>
>> @@ -74,7 +74,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct
>> sk_buff *skb, __be32 vx_vni)
>>  static int vxlan_get_options(const struct vport *vport, struct sk_buff
>> *skb)
>>  {
>>         struct vxlan_port *vxlan_port = vxlan_vport(vport);
>> -       __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
>> +       __be16 dst_port =
>> inet_sk(vxlan_port->vs->uts.sock->sk)->inet_sport;
>>
>>         if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
>>                 return -EMSGSIZE;
>> @@ -139,7 +139,7 @@ static int vxlan_tnl_send(struct vport *vport, struct
>> sk_buff *skb)
>>  {
>>         struct net *net = ovs_dp_get_net(vport->dp);
>>         struct vxlan_port *vxlan_port = vxlan_vport(vport);
>> -       __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
>> +       __be16 dst_port =
>> inet_sk(vxlan_port->vs->uts.sock->sk)->inet_sport;
>>         struct rtable *rt;
>>         struct flowi4 fl;
>>         __be16 src_port;
>> --
>> 1.7.9.5
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [net-next v3 3/3] l2tp: Refactor l2tp core driver to make use of the common UDP tunnel functions
  2014-08-27  3:35 [net-next v3 1/3] udp: Expand UDP tunnel common APIs Andy Zhou
  2014-08-27  3:35 ` [net-next v3 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions Andy Zhou
@ 2014-08-27  3:35 ` Andy Zhou
  2014-08-27  3:54 ` [net-next v3 1/3] udp: Expand UDP tunnel common APIs Eric Dumazet
  2014-08-27  4:12 ` Tom Herbert
  3 siblings, 0 replies; 10+ messages in thread
From: Andy Zhou @ 2014-08-27  3:35 UTC (permalink / raw)
  To: davem; +Cc: netdev, Andy Zhou

Signed-off-by: Andy Zhou <azhou@nicira.com>
---
 net/l2tp/l2tp_core.c |  189 +++++++++++++++++++++++++++++---------------------
 1 file changed, 109 insertions(+), 80 deletions(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 1109d3b..b4b9474 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -113,6 +113,11 @@ struct l2tp_net {
 	spinlock_t l2tp_session_hlist_lock;
 };
 
+struct l2tp_udp_tunnel {
+	struct udp_tunnel_sock uts;
+	struct l2tp_tunnel tunnel;
+};
+
 static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
 
 static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk)
@@ -1334,14 +1339,47 @@ static void l2tp_tunnel_del_work(struct work_struct *work)
 		if (sock)
 			inet_shutdown(sock, 2);
 	} else {
+		struct l2tp_udp_tunnel *udp_tunnel;
+
+		udp_tunnel = container_of(tunnel,
+					  struct l2tp_udp_tunnel, tunnel);
+
 		if (sock)
-			kernel_sock_shutdown(sock, SHUT_RDWR);
-		sk_release_kernel(sk);
+			udp_tunnel_sock_release(&udp_tunnel->uts);
 	}
 
 	l2tp_tunnel_sock_put(sk);
 }
 
+static void l2tp_tunnel_udp_port_conf(struct l2tp_tunnel_cfg *cfg,
+				      struct udp_port_cfg *udp_conf)
+{
+	memset(udp_conf, 0, sizeof(*udp_conf));
+
+#if IS_ENABLED(CONFIG_IPV6)
+	if (cfg->local_ip6 && cfg->peer_ip6) {
+		udp_conf->family = AF_INET6;
+		memcpy(&udp_conf->local_ip6, cfg->local_ip6,
+				sizeof(udp_conf->local_ip6));
+		memcpy(&udp_conf->peer_ip6, cfg->peer_ip6,
+				sizeof(udp_conf->peer_ip6));
+		udp_conf->use_udp6_tx_checksums =
+			cfg->udp6_zero_tx_checksums;
+		udp_conf->use_udp6_rx_checksums =
+			cfg->udp6_zero_rx_checksums;
+	} else
+#endif
+	{
+		udp_conf->family = AF_INET;
+		udp_conf->local_ip = cfg->local_ip;
+		udp_conf->peer_ip = cfg->peer_ip;
+		udp_conf->use_udp_checksums = cfg->use_udp_checksums;
+	}
+
+	udp_conf->local_udp_port = htons(cfg->local_udp_port);
+	udp_conf->peer_udp_port = htons(cfg->peer_udp_port);
+}
+
 /* Create a socket for the tunnel, if one isn't set up by
  * userspace. This is used for static tunnels where there is no
  * managing L2TP daemon.
@@ -1363,31 +1401,7 @@ static int l2tp_tunnel_sock_create(struct net *net,
 
 	switch (cfg->encap) {
 	case L2TP_ENCAPTYPE_UDP:
-		memset(&udp_conf, 0, sizeof(udp_conf));
-
-#if IS_ENABLED(CONFIG_IPV6)
-		if (cfg->local_ip6 && cfg->peer_ip6) {
-			udp_conf.family = AF_INET6;
-			memcpy(&udp_conf.local_ip6, cfg->local_ip6,
-			       sizeof(udp_conf.local_ip6));
-			memcpy(&udp_conf.peer_ip6, cfg->peer_ip6,
-			       sizeof(udp_conf.peer_ip6));
-			udp_conf.use_udp6_tx_checksums =
-			    cfg->udp6_zero_tx_checksums;
-			udp_conf.use_udp6_rx_checksums =
-			    cfg->udp6_zero_rx_checksums;
-		} else
-#endif
-		{
-			udp_conf.family = AF_INET;
-			udp_conf.local_ip = cfg->local_ip;
-			udp_conf.peer_ip = cfg->peer_ip;
-			udp_conf.use_udp_checksums = cfg->use_udp_checksums;
-		}
-
-		udp_conf.local_udp_port = htons(cfg->local_udp_port);
-		udp_conf.peer_udp_port = htons(cfg->peer_udp_port);
-
+		l2tp_tunnel_udp_port_conf(cfg, &udp_conf);
 		err = udp_sock_create(net, &udp_conf, &sock);
 		if (err < 0)
 			goto out;
@@ -1471,6 +1485,31 @@ out:
 
 static struct lock_class_key l2tp_socket_class;
 
+static int l2tp_sk_sanity_check(struct sock *sk, enum l2tp_encap_type encap,
+				u32 tunnel_id, int fd)
+{
+	unsigned int expected_protocol;
+
+	switch (encap) {
+	case L2TP_ENCAPTYPE_UDP:
+		expected_protocol = IPPROTO_UDP;
+		break;
+	case L2TP_ENCAPTYPE_IP:
+		expected_protocol = IPPROTO_L2TP;
+		break;
+	default:
+		return -EPROTONOSUPPORT;
+	}
+
+	if (sk->sk_protocol != expected_protocol) {
+		pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
+		       tunnel_id, fd, sk->sk_protocol, expected_protocol);
+		return -EPROTONOSUPPORT;
+	}
+
+	return 0;
+}
+
 int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp)
 {
 	struct l2tp_tunnel *tunnel = NULL;
@@ -1478,7 +1517,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	struct socket *sock = NULL;
 	struct sock *sk = NULL;
 	struct l2tp_net *pn;
-	enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP;
+	enum l2tp_encap_type encap = cfg ? cfg->encap : L2TP_ENCAPTYPE_UDP;
 
 	/* Get the tunnel socket from the fd, which was opened by
 	 * the userspace L2TP daemon. If not specified, create a
@@ -1486,9 +1525,11 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	 */
 	if (fd < 0) {
 		err = l2tp_tunnel_sock_create(net, tunnel_id, peer_tunnel_id,
-				cfg, &sock);
+					      cfg, &sock);
 		if (err < 0)
 			goto err;
+
+		sk = sock->sk;
 	} else {
 		sock = sockfd_lookup(fd, &err);
 		if (!sock) {
@@ -1498,58 +1539,66 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 			goto err;
 		}
 
+		sk = sock->sk;
+
 		/* Reject namespace mismatches */
-		if (!net_eq(sock_net(sock->sk), net)) {
+		if (!net_eq(sock_net(sk), net)) {
 			pr_err("tunl %u: netns mismatch\n", tunnel_id);
 			err = -EINVAL;
 			goto err;
 		}
+
+		/* Quick sanity checks */
+		err = l2tp_sk_sanity_check(sk, encap, tunnel_id, fd);
+		if (err)
+			goto err;
+
+		/* Check if this socket has already been prepped */
+		tunnel = l2tp_tunnel(sk);
+		if (tunnel != NULL) {
+			/* This socket has already been prepped */
+			err = -EBUSY;
+			goto err;
+		}
 	}
 
-	sk = sock->sk;
+	switch(encap) {
+	case L2TP_ENCAPTYPE_UDP: {
+		struct udp_tunnel_socket_cfg l2tp_udp_cfg;
+		struct l2tp_udp_tunnel *udp_tunnel;
 
-	if (cfg != NULL)
-		encap = cfg->encap;
+		l2tp_tunnel_udp_port_conf(cfg, &l2tp_udp_cfg.port);
+		l2tp_udp_cfg.encap_type = UDP_ENCAP_L2TPINUDP;
+		l2tp_udp_cfg.encap_rcv = l2tp_udp_encap_recv;
+		l2tp_udp_cfg.encap_destroy = l2tp_udp_encap_destroy;
 
-	/* Quick sanity checks */
-	switch (encap) {
-	case L2TP_ENCAPTYPE_UDP:
-		err = -EPROTONOSUPPORT;
-		if (sk->sk_protocol != IPPROTO_UDP) {
-			pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
-			       tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
+		udp_tunnel = (struct l2tp_udp_tunnel *)
+			create_udp_tunnel_sock(net, sizeof(*udp_tunnel),
+					       sock, &l2tp_udp_cfg);
+
+		if (!udp_tunnel) {
+			err = -ENOMEM;
 			goto err;
 		}
+
+		tunnel = &udp_tunnel->tunnel;
 		break;
+	}
 	case L2TP_ENCAPTYPE_IP:
-		err = -EPROTONOSUPPORT;
-		if (sk->sk_protocol != IPPROTO_L2TP) {
-			pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
-			       tunnel_id, fd, sk->sk_protocol, IPPROTO_L2TP);
+		tunnel = kzalloc(sizeof(*tunnel), GFP_KERNEL);
+		if (tunnel == NULL) {
+			err = -ENOMEM;
 			goto err;
 		}
-		break;
-	}
-
-	/* Check if this socket has already been prepped */
-	tunnel = l2tp_tunnel(sk);
-	if (tunnel != NULL) {
-		/* This socket has already been prepped */
-		err = -EBUSY;
-		goto err;
 	}
 
-	tunnel = kzalloc(sizeof(struct l2tp_tunnel), GFP_KERNEL);
-	if (tunnel == NULL) {
-		err = -ENOMEM;
-		goto err;
-	}
+	rcu_assign_sk_user_data(sk, tunnel);
 
+	tunnel->encap = encap;
 	tunnel->version = version;
 	tunnel->tunnel_id = tunnel_id;
 	tunnel->peer_tunnel_id = peer_tunnel_id;
-	tunnel->debug = L2TP_DEFAULT_DEBUG_FLAGS;
-
+	tunnel->debug = cfg ? cfg->debug : L2TP_DEFAULT_DEBUG_FLAGS;
 	tunnel->magic = L2TP_TUNNEL_MAGIC;
 	sprintf(&tunnel->name[0], "tunl %u", tunnel_id);
 	rwlock_init(&tunnel->hlist_lock);
@@ -1558,9 +1607,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	tunnel->l2tp_net = net;
 	pn = l2tp_pernet(net);
 
-	if (cfg != NULL)
-		tunnel->debug = cfg->debug;
-
 #if IS_ENABLED(CONFIG_IPV6)
 	if (sk->sk_family == PF_INET6) {
 		struct ipv6_pinfo *np = inet6_sk(sk);
@@ -1579,23 +1625,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	}
 #endif
 
-	/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
-	tunnel->encap = encap;
-	if (encap == L2TP_ENCAPTYPE_UDP) {
-		/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
-		udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP;
-		udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv;
-		udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy;
-#if IS_ENABLED(CONFIG_IPV6)
-		if (sk->sk_family == PF_INET6 && !tunnel->v4mapped)
-			udpv6_encap_enable();
-		else
-#endif
-		udp_encap_enable();
-	}
-
-	sk->sk_user_data = tunnel;
-
 	/* Hook on the tunnel socket destructor so that we can cleanup
 	 * if the tunnel socket goes away.
 	 */
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [net-next v3 1/3] udp: Expand UDP tunnel common APIs
  2014-08-27  3:35 [net-next v3 1/3] udp: Expand UDP tunnel common APIs Andy Zhou
  2014-08-27  3:35 ` [net-next v3 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions Andy Zhou
  2014-08-27  3:35 ` [net-next v3 3/3] l2tp: Refactor l2tp core " Andy Zhou
@ 2014-08-27  3:54 ` Eric Dumazet
  2014-08-27  5:01   ` Andy Zhou
  2014-08-27  4:12 ` Tom Herbert
  3 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2014-08-27  3:54 UTC (permalink / raw)
  To: Andy Zhou; +Cc: davem, netdev

On Tue, 2014-08-26 at 20:35 -0700, Andy Zhou wrote:
> Added create_udp_tunnel_socket(), packet receive and transmit,  and
> other related common functions for UDP tunnels.
> 
> Per net open UDP tunnel ports are tracked in this common layer to
> prevent sharing of a single port with more than one UDP tunnel.
> 
> Signed-off-by: Andy Zhou <azhou@nicira.com>
> ---

I see some RCU code, but I do not see elementary rules of RCU being
respected in this patch.

Changelogs are a bit terse, and there is not a single comment in this
code.

What is going on exactly ?

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [net-next v3 1/3] udp: Expand UDP tunnel common APIs
  2014-08-27  3:54 ` [net-next v3 1/3] udp: Expand UDP tunnel common APIs Eric Dumazet
@ 2014-08-27  5:01   ` Andy Zhou
  2014-08-27 11:45     ` Eric Dumazet
  0 siblings, 1 reply; 10+ messages in thread
From: Andy Zhou @ 2014-08-27  5:01 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev@vger.kernel.org

On Tue, Aug 26, 2014 at 8:54 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Tue, 2014-08-26 at 20:35 -0700, Andy Zhou wrote:
>> Added create_udp_tunnel_socket(), packet receive and transmit,  and
>> other related common functions for UDP tunnels.
>>
>> Per net open UDP tunnel ports are tracked in this common layer to
>> prevent sharing of a single port with more than one UDP tunnel.
>>
>> Signed-off-by: Andy Zhou <azhou@nicira.com>
>> ---
>
> I see some RCU code, but I do not see elementary rules of RCU being
> respected in this patch.

Which RCU rule are you referring to?

>
> Changelogs are a bit terse, and there is not a single comment in this
> code.

I can add more content and comments in the next patch.

> What is going on exactly ?
>

I am trying to refactor some UDP tunnel code so that we don't end up
duplicate a lot of code when adding new UDP based tunnel protocols.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [net-next v3 1/3] udp: Expand UDP tunnel common APIs
  2014-08-27  5:01   ` Andy Zhou
@ 2014-08-27 11:45     ` Eric Dumazet
  0 siblings, 0 replies; 10+ messages in thread
From: Eric Dumazet @ 2014-08-27 11:45 UTC (permalink / raw)
  To: Andy Zhou; +Cc: David Miller, netdev@vger.kernel.org

On Tue, 2014-08-26 at 22:01 -0700, Andy Zhou wrote:

> Which RCU rule are you referring to?

The most elementary ones, like observing rcu grace period before
freeing ?

udp_tunnel_sock_release() is obviously wrong, or needs an appropriate
documentation.

Please carefully read Documentation/RCU/checklist.txt and tell us why
you believe your code is correct, either in the changelog or using
comments.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [net-next v3 1/3] udp: Expand UDP tunnel common APIs
  2014-08-27  3:35 [net-next v3 1/3] udp: Expand UDP tunnel common APIs Andy Zhou
                   ` (2 preceding siblings ...)
  2014-08-27  3:54 ` [net-next v3 1/3] udp: Expand UDP tunnel common APIs Eric Dumazet
@ 2014-08-27  4:12 ` Tom Herbert
  2014-08-27  5:17   ` Andy Zhou
  3 siblings, 1 reply; 10+ messages in thread
From: Tom Herbert @ 2014-08-27  4:12 UTC (permalink / raw)
  To: Andy Zhou; +Cc: David Miller, Linux Netdev List

On Tue, Aug 26, 2014 at 8:35 PM, Andy Zhou <azhou@nicira.com> wrote:
> Added create_udp_tunnel_socket(), packet receive and transmit,  and
> other related common functions for UDP tunnels.
>
> Per net open UDP tunnel ports are tracked in this common layer to
> prevent sharing of a single port with more than one UDP tunnel.
>
This is not needed! If a UDP port is already bound (whether by another
tunnel or not), then bind during tunnel initialization will fail. All
this logic to store tunnel sockets in a separate list seems like
unnecessary complexity. If a driver needs to track multiple ports it
opens, it can do that on its own like VXLAN is already doing.

> Signed-off-by: Andy Zhou <azhou@nicira.com>
> ---
>  include/net/udp_tunnel.h |   50 +++++++++++
>  net/ipv4/udp_tunnel.c    |  216 +++++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 265 insertions(+), 1 deletion(-)
>
> diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
> index ffd69cb..5ff0746 100644
> --- a/include/net/udp_tunnel.h
> +++ b/include/net/udp_tunnel.h
> @@ -1,6 +1,8 @@
>  #ifndef __NET_UDP_TUNNEL_H
>  #define __NET_UDP_TUNNEL_H
>
> +#include <net/ip_tunnels.h>
> +
>  struct udp_port_cfg {
>         u8                      family;
>
> @@ -26,7 +28,55 @@ struct udp_port_cfg {
>                                 use_udp6_rx_checksums:1;
>  };
>
> +struct udp_tunnel_sock;
> +
> +typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
> +typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk);
> +
> +struct udp_tunnel_socket_cfg {
> +       struct udp_port_cfg port;
> +
> +       /* Used for setting up udp_sock fields, see udp.h for details */
> +       __u8  encap_type;
> +       udp_tunnel_encap_rcv_t encap_rcv;
> +       udp_tunnel_encap_destroy_t encap_destroy;
> +};
> +
> +struct udp_tunnel_sock {
> +       struct hlist_node hlist;
> +       struct socket *sock;
> +};
> +
>  int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
>                     struct socket **sockp);
>
> +struct udp_tunnel_sock *create_udp_tunnel_sock(struct net *net, size_t size,
> +                                              struct socket *sock,
> +                                              struct udp_tunnel_socket_cfg
> +                                                       *socket_cfg);
> +
> +struct udp_tunnel_sock *udp_tunnel_find_sock(struct net *net, __be16 port);
> +
> +int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt,
> +                       struct sk_buff *skb, __be32 src, __be32 dst,
> +                       __u8 tos, __u8 ttl, __be16 df, __be16 src_port,
> +                       __be16 dst_port, bool xnet);
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
> +               struct sk_buff *skb, struct net_device *dev,
> +               struct in6_addr *saddr, struct in6_addr *daddr,
> +               __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port);
> +
> +#endif
> +
> +void udp_tunnel_sock_release(struct udp_tunnel_sock *uts);
> +
> +static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb,
> +                                                        bool udp_csum)
> +{
> +       int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
> +
> +       return iptunnel_handle_offloads(skb, udp_csum, type);
> +}
>  #endif
> diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
> index 61ec1a6..3ed66bc 100644
> --- a/net/ipv4/udp_tunnel.c
> +++ b/net/ipv4/udp_tunnel.c
> @@ -7,6 +7,23 @@
>  #include <net/udp.h>
>  #include <net/udp_tunnel.h>
>  #include <net/net_namespace.h>
> +#include <net/netns/generic.h>
> +#if IS_ENABLED(CONFIG_IPV6)
> +#include <net/ipv6.h>
> +#include <net/addrconf.h>
> +#include <net/ip6_tunnel.h>
> +#include <net/ip6_checksum.h>
> +#endif
> +
> +#define PORT_HASH_BITS 8
> +#define PORT_HASH_SIZE (1 << PORT_HASH_BITS)
> +
> +static int udp_tunnel_net_id;
> +
> +struct udp_tunnel_net {
> +       struct hlist_head sock_list[PORT_HASH_SIZE];
> +       spinlock_t  sock_lock;   /* Protecting the sock_list */
> +};
>
>  int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
>                     struct socket **sockp)
> @@ -82,7 +99,6 @@ int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
>                 return -EPFNOSUPPORT;
>         }
>
> -
>         *sockp = sock;
>
>         return 0;
> @@ -97,4 +113,202 @@ error:
>  }
>  EXPORT_SYMBOL(udp_sock_create);
>
> +
> +/* Socket hash table head */
> +static inline struct hlist_head *uts_head(struct net *net, const __be16 port)
> +{
> +       struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id);
> +
> +       return &utn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
> +}
> +
> +struct udp_tunnel_sock *create_udp_tunnel_sock(struct net *net, size_t size,
> +                                              struct socket *sock,
> +                                              struct udp_tunnel_socket_cfg
> +                                                     *cfg)
> +{
> +       struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id);
> +       struct udp_tunnel_sock *uts;
> +       struct sock *sk;
> +       const __be16 port = cfg->port.local_udp_port;
> +       const int ipv6 = (cfg->port.family == AF_INET6);
> +       int err;
> +
> +       if (!sock)
> +               err = udp_sock_create(net, &cfg->port, &sock);
> +       else
> +               err = (sock->sk->sk_protocol == IPPROTO_UDP) ?
> +                       0 : -EPROTONOSUPPORT;
> +
> +       if (err)
> +               return NULL;
> +
> +       uts = kzalloc(size, GFP_KERNEL);
> +       if (!uts)
> +               return ERR_PTR(-ENOMEM);
> +
> +       sk = sock->sk;
> +
> +       /* Disable multicast loopback */
> +       inet_sk(sk)->mc_loop = 0;
> +
> +       rcu_assign_sk_user_data(sk, uts);
> +
> +       udp_sk(sk)->encap_type = cfg->encap_type;
> +       udp_sk(sk)->encap_rcv = cfg->encap_rcv;
> +       udp_sk(sk)->encap_destroy = cfg->encap_destroy;
> +
> +       uts->sock = sock;
> +
> +       spin_lock(&utn->sock_lock);
> +       hlist_add_head_rcu(&uts->hlist, uts_head(net, port));
> +       spin_unlock(&utn->sock_lock);
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +       if (ipv6)
> +               ipv6_stub->udpv6_encap_enable();
> +       else
> +#endif
> +               udp_encap_enable();
> +
> +       return uts;
> +}
> +EXPORT_SYMBOL_GPL(create_udp_tunnel_sock);
> +
> +int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt,
> +                       struct sk_buff *skb, __be32 src, __be32 dst,
> +                       __u8 tos, __u8 ttl, __be16 df, __be16 src_port,
> +                       __be16 dst_port, bool xnet)
> +{
> +       struct udphdr *uh;
> +
> +       __skb_push(skb, sizeof(*uh));
> +       skb_reset_transport_header(skb);
> +       uh = udp_hdr(skb);
> +
> +       uh->dest = dst_port;
> +       uh->source = src_port;
> +       uh->len = htons(skb->len);
> +
> +       udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len);
> +
> +       return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP,
> +                            tos, ttl, df, xnet);
> +}
> +EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb);
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
> +                        struct sk_buff *skb, struct net_device *dev,
> +                        struct in6_addr *saddr, struct in6_addr *daddr,
> +                        __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port)
> +{
> +       struct udphdr *uh;
> +       struct ipv6hdr *ip6h;
> +
> +       __skb_push(skb, sizeof(*uh));
> +       skb_reset_transport_header(skb);
> +       uh = udp_hdr(skb);
> +
> +       uh->dest = dst_port;
> +       uh->source = src_port;
> +
> +       uh->len = htons(skb->len);
> +       uh->check = 0;
> +
> +       memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
> +       IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
> +                           | IPSKB_REROUTED);
> +       skb_dst_set(skb, dst);
> +
> +       if (!skb_is_gso(skb) && !(dst->dev->features & NETIF_F_IPV6_CSUM)) {
> +               __wsum csum = skb_checksum(skb, 0, skb->len, 0);
> +
> +               skb->ip_summed = CHECKSUM_UNNECESSARY;
> +               uh->check = csum_ipv6_magic(saddr, daddr, skb->len,
> +                               IPPROTO_UDP, csum);
> +               if (uh->check == 0)
> +                       uh->check = CSUM_MANGLED_0;
> +       } else {
> +               skb->ip_summed = CHECKSUM_PARTIAL;
> +               skb->csum_start = skb_transport_header(skb) - skb->head;
> +               skb->csum_offset = offsetof(struct udphdr, check);
> +               uh->check = ~csum_ipv6_magic(saddr, daddr,
> +                               skb->len, IPPROTO_UDP, 0);
> +       }
> +
> +       __skb_push(skb, sizeof(*ip6h));
> +       skb_reset_network_header(skb);
> +       ip6h              = ipv6_hdr(skb);
> +       ip6h->version     = 6;
> +       ip6h->priority    = prio;
> +       ip6h->flow_lbl[0] = 0;
> +       ip6h->flow_lbl[1] = 0;
> +       ip6h->flow_lbl[2] = 0;
> +       ip6h->payload_len = htons(skb->len);
> +       ip6h->nexthdr     = IPPROTO_UDP;
> +       ip6h->hop_limit   = ttl;
> +       ip6h->daddr       = *daddr;
> +       ip6h->saddr       = *saddr;
> +
> +       ip6tunnel_xmit(skb, dev);
> +       return 0;
> +}
> +EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb);
> +#endif
> +
> +struct udp_tunnel_sock *udp_tunnel_find_sock(struct net *net, __be16 port)
> +{
> +       struct udp_tunnel_sock *uts;
> +
> +       hlist_for_each_entry_rcu(uts, uts_head(net, port), hlist) {
> +               if (inet_sk(uts->sock->sk)->inet_sport == port)
> +                       return uts;
> +       }
> +
> +       return NULL;
> +}
> +EXPORT_SYMBOL_GPL(udp_tunnel_find_sock);
> +
> +void udp_tunnel_sock_release(struct udp_tunnel_sock *uts)
> +{
> +       struct sock *sk = uts->sock->sk;
> +       struct net *net = sock_net(sk);
> +       struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id);
> +
> +       spin_lock(&utn->sock_lock);
> +       hlist_del_rcu(&uts->hlist);
> +       rcu_assign_sk_user_data(uts->sock->sk, NULL);
> +       kernel_sock_shutdown(uts->sock, SHUT_RDWR);
> +       sk_release_kernel(sk);
> +       spin_unlock(&utn->sock_lock);
> +}
> +EXPORT_SYMBOL_GPL(udp_tunnel_sock_release);
> +
> +static int __net_init udp_tunnel_init_net(struct net *net)
> +{
> +       struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id);
> +       unsigned int h;
> +
> +       spin_lock_init(&utn->sock_lock);
> +
> +       for (h = 0; h < PORT_HASH_SIZE; h++)
> +               INIT_HLIST_HEAD(&utn->sock_list[h]);
> +
> +       return 0;
> +}
> +
> +static struct pernet_operations udp_tunnel_net_ops = {
> +       .init = udp_tunnel_init_net,
> +       .exit = NULL,
> +       .id = &udp_tunnel_net_id,
> +       .size = sizeof(struct udp_tunnel_net),
> +};
> +
> +static int __init udp_tunnel_init(void)
> +{
> +       return register_pernet_subsys(&udp_tunnel_net_ops);
> +}
> +late_initcall(udp_tunnel_init);
> +
>  MODULE_LICENSE("GPL");
> --
> 1.7.9.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [net-next v3 1/3] udp: Expand UDP tunnel common APIs
  2014-08-27  4:12 ` Tom Herbert
@ 2014-08-27  5:17   ` Andy Zhou
  0 siblings, 0 replies; 10+ messages in thread
From: Andy Zhou @ 2014-08-27  5:17 UTC (permalink / raw)
  To: Tom Herbert; +Cc: David Miller, Linux Netdev List

On Tue, Aug 26, 2014 at 9:12 PM, Tom Herbert <therbert@google.com> wrote:
> On Tue, Aug 26, 2014 at 8:35 PM, Andy Zhou <azhou@nicira.com> wrote:
>> Added create_udp_tunnel_socket(), packet receive and transmit,  and
>> other related common functions for UDP tunnels.
>>
>> Per net open UDP tunnel ports are tracked in this common layer to
>> prevent sharing of a single port with more than one UDP tunnel.
>>
> This is not needed! If a UDP port is already bound (whether by another
> tunnel or not), then bind during tunnel initialization will fail. All
> this logic to store tunnel sockets in a separate list seems like
> unnecessary complexity. If a driver needs to track multiple ports it
> opens, it can do that on its own like VXLAN is already doing.

I was hoping to retain the udp_tunnel_find_sock() function which seems
to be common.
But it is not doing much at the moment, so I can just drop it in the
next version.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2014-08-27 11:45 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-08-27  3:35 [net-next v3 1/3] udp: Expand UDP tunnel common APIs Andy Zhou
2014-08-27  3:35 ` [net-next v3 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions Andy Zhou
2014-08-27  4:15   ` Tom Herbert
     [not found]   ` <CA+mtBx8R2YyAMbPuw=iYDbLpSwRo_robnxOYPUFV2368-RzdLg@mail.gmail.com>
2014-08-27  5:19     ` Andy Zhou
2014-08-27  3:35 ` [net-next v3 3/3] l2tp: Refactor l2tp core " Andy Zhou
2014-08-27  3:54 ` [net-next v3 1/3] udp: Expand UDP tunnel common APIs Eric Dumazet
2014-08-27  5:01   ` Andy Zhou
2014-08-27 11:45     ` Eric Dumazet
2014-08-27  4:12 ` Tom Herbert
2014-08-27  5:17   ` Andy Zhou

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).