* [net-next v3 1/3] udp: Expand UDP tunnel common APIs
@ 2014-08-27 3:35 Andy Zhou
2014-08-27 3:35 ` [net-next v3 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions Andy Zhou
` (3 more replies)
0 siblings, 4 replies; 10+ messages in thread
From: Andy Zhou @ 2014-08-27 3:35 UTC (permalink / raw)
To: davem; +Cc: netdev, Andy Zhou
Added create_udp_tunnel_socket(), packet receive and transmit, and
other related common functions for UDP tunnels.
Per net open UDP tunnel ports are tracked in this common layer to
prevent sharing of a single port with more than one UDP tunnel.
Signed-off-by: Andy Zhou <azhou@nicira.com>
---
include/net/udp_tunnel.h | 50 +++++++++++
net/ipv4/udp_tunnel.c | 216 +++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 265 insertions(+), 1 deletion(-)
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index ffd69cb..5ff0746 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -1,6 +1,8 @@
#ifndef __NET_UDP_TUNNEL_H
#define __NET_UDP_TUNNEL_H
+#include <net/ip_tunnels.h>
+
struct udp_port_cfg {
u8 family;
@@ -26,7 +28,55 @@ struct udp_port_cfg {
use_udp6_rx_checksums:1;
};
+struct udp_tunnel_sock;
+
+typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
+typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk);
+
+struct udp_tunnel_socket_cfg {
+ struct udp_port_cfg port;
+
+ /* Used for setting up udp_sock fields, see udp.h for details */
+ __u8 encap_type;
+ udp_tunnel_encap_rcv_t encap_rcv;
+ udp_tunnel_encap_destroy_t encap_destroy;
+};
+
+struct udp_tunnel_sock {
+ struct hlist_node hlist;
+ struct socket *sock;
+};
+
int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
struct socket **sockp);
+struct udp_tunnel_sock *create_udp_tunnel_sock(struct net *net, size_t size,
+ struct socket *sock,
+ struct udp_tunnel_socket_cfg
+ *socket_cfg);
+
+struct udp_tunnel_sock *udp_tunnel_find_sock(struct net *net, __be16 port);
+
+int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt,
+ struct sk_buff *skb, __be32 src, __be32 dst,
+ __u8 tos, __u8 ttl, __be16 df, __be16 src_port,
+ __be16 dst_port, bool xnet);
+
+#if IS_ENABLED(CONFIG_IPV6)
+int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
+ struct sk_buff *skb, struct net_device *dev,
+ struct in6_addr *saddr, struct in6_addr *daddr,
+ __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port);
+
+#endif
+
+void udp_tunnel_sock_release(struct udp_tunnel_sock *uts);
+
+static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb,
+ bool udp_csum)
+{
+ int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+
+ return iptunnel_handle_offloads(skb, udp_csum, type);
+}
#endif
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 61ec1a6..3ed66bc 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -7,6 +7,23 @@
#include <net/udp.h>
#include <net/udp_tunnel.h>
#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#include <net/ip6_tunnel.h>
+#include <net/ip6_checksum.h>
+#endif
+
+#define PORT_HASH_BITS 8
+#define PORT_HASH_SIZE (1 << PORT_HASH_BITS)
+
+static int udp_tunnel_net_id;
+
+struct udp_tunnel_net {
+ struct hlist_head sock_list[PORT_HASH_SIZE];
+ spinlock_t sock_lock; /* Protecting the sock_list */
+};
int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
struct socket **sockp)
@@ -82,7 +99,6 @@ int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
return -EPFNOSUPPORT;
}
-
*sockp = sock;
return 0;
@@ -97,4 +113,202 @@ error:
}
EXPORT_SYMBOL(udp_sock_create);
+
+/* Socket hash table head */
+static inline struct hlist_head *uts_head(struct net *net, const __be16 port)
+{
+ struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id);
+
+ return &utn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
+}
+
+struct udp_tunnel_sock *create_udp_tunnel_sock(struct net *net, size_t size,
+ struct socket *sock,
+ struct udp_tunnel_socket_cfg
+ *cfg)
+{
+ struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id);
+ struct udp_tunnel_sock *uts;
+ struct sock *sk;
+ const __be16 port = cfg->port.local_udp_port;
+ const int ipv6 = (cfg->port.family == AF_INET6);
+ int err;
+
+ if (!sock)
+ err = udp_sock_create(net, &cfg->port, &sock);
+ else
+ err = (sock->sk->sk_protocol == IPPROTO_UDP) ?
+ 0 : -EPROTONOSUPPORT;
+
+ if (err)
+ return NULL;
+
+ uts = kzalloc(size, GFP_KERNEL);
+ if (!uts)
+ return ERR_PTR(-ENOMEM);
+
+ sk = sock->sk;
+
+ /* Disable multicast loopback */
+ inet_sk(sk)->mc_loop = 0;
+
+ rcu_assign_sk_user_data(sk, uts);
+
+ udp_sk(sk)->encap_type = cfg->encap_type;
+ udp_sk(sk)->encap_rcv = cfg->encap_rcv;
+ udp_sk(sk)->encap_destroy = cfg->encap_destroy;
+
+ uts->sock = sock;
+
+ spin_lock(&utn->sock_lock);
+ hlist_add_head_rcu(&uts->hlist, uts_head(net, port));
+ spin_unlock(&utn->sock_lock);
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (ipv6)
+ ipv6_stub->udpv6_encap_enable();
+ else
+#endif
+ udp_encap_enable();
+
+ return uts;
+}
+EXPORT_SYMBOL_GPL(create_udp_tunnel_sock);
+
+int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt,
+ struct sk_buff *skb, __be32 src, __be32 dst,
+ __u8 tos, __u8 ttl, __be16 df, __be16 src_port,
+ __be16 dst_port, bool xnet)
+{
+ struct udphdr *uh;
+
+ __skb_push(skb, sizeof(*uh));
+ skb_reset_transport_header(skb);
+ uh = udp_hdr(skb);
+
+ uh->dest = dst_port;
+ uh->source = src_port;
+ uh->len = htons(skb->len);
+
+ udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len);
+
+ return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP,
+ tos, ttl, df, xnet);
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb);
+
+#if IS_ENABLED(CONFIG_IPV6)
+int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
+ struct sk_buff *skb, struct net_device *dev,
+ struct in6_addr *saddr, struct in6_addr *daddr,
+ __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port)
+{
+ struct udphdr *uh;
+ struct ipv6hdr *ip6h;
+
+ __skb_push(skb, sizeof(*uh));
+ skb_reset_transport_header(skb);
+ uh = udp_hdr(skb);
+
+ uh->dest = dst_port;
+ uh->source = src_port;
+
+ uh->len = htons(skb->len);
+ uh->check = 0;
+
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+ IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
+ | IPSKB_REROUTED);
+ skb_dst_set(skb, dst);
+
+ if (!skb_is_gso(skb) && !(dst->dev->features & NETIF_F_IPV6_CSUM)) {
+ __wsum csum = skb_checksum(skb, 0, skb->len, 0);
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ uh->check = csum_ipv6_magic(saddr, daddr, skb->len,
+ IPPROTO_UDP, csum);
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+ } else {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ uh->check = ~csum_ipv6_magic(saddr, daddr,
+ skb->len, IPPROTO_UDP, 0);
+ }
+
+ __skb_push(skb, sizeof(*ip6h));
+ skb_reset_network_header(skb);
+ ip6h = ipv6_hdr(skb);
+ ip6h->version = 6;
+ ip6h->priority = prio;
+ ip6h->flow_lbl[0] = 0;
+ ip6h->flow_lbl[1] = 0;
+ ip6h->flow_lbl[2] = 0;
+ ip6h->payload_len = htons(skb->len);
+ ip6h->nexthdr = IPPROTO_UDP;
+ ip6h->hop_limit = ttl;
+ ip6h->daddr = *daddr;
+ ip6h->saddr = *saddr;
+
+ ip6tunnel_xmit(skb, dev);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb);
+#endif
+
+struct udp_tunnel_sock *udp_tunnel_find_sock(struct net *net, __be16 port)
+{
+ struct udp_tunnel_sock *uts;
+
+ hlist_for_each_entry_rcu(uts, uts_head(net, port), hlist) {
+ if (inet_sk(uts->sock->sk)->inet_sport == port)
+ return uts;
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_find_sock);
+
+void udp_tunnel_sock_release(struct udp_tunnel_sock *uts)
+{
+ struct sock *sk = uts->sock->sk;
+ struct net *net = sock_net(sk);
+ struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id);
+
+ spin_lock(&utn->sock_lock);
+ hlist_del_rcu(&uts->hlist);
+ rcu_assign_sk_user_data(uts->sock->sk, NULL);
+ kernel_sock_shutdown(uts->sock, SHUT_RDWR);
+ sk_release_kernel(sk);
+ spin_unlock(&utn->sock_lock);
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_sock_release);
+
+static int __net_init udp_tunnel_init_net(struct net *net)
+{
+ struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id);
+ unsigned int h;
+
+ spin_lock_init(&utn->sock_lock);
+
+ for (h = 0; h < PORT_HASH_SIZE; h++)
+ INIT_HLIST_HEAD(&utn->sock_list[h]);
+
+ return 0;
+}
+
+static struct pernet_operations udp_tunnel_net_ops = {
+ .init = udp_tunnel_init_net,
+ .exit = NULL,
+ .id = &udp_tunnel_net_id,
+ .size = sizeof(struct udp_tunnel_net),
+};
+
+static int __init udp_tunnel_init(void)
+{
+ return register_pernet_subsys(&udp_tunnel_net_ops);
+}
+late_initcall(udp_tunnel_init);
+
MODULE_LICENSE("GPL");
--
1.7.9.5
^ permalink raw reply related [flat|nested] 10+ messages in thread* [net-next v3 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions. 2014-08-27 3:35 [net-next v3 1/3] udp: Expand UDP tunnel common APIs Andy Zhou @ 2014-08-27 3:35 ` Andy Zhou 2014-08-27 4:15 ` Tom Herbert [not found] ` <CA+mtBx8R2YyAMbPuw=iYDbLpSwRo_robnxOYPUFV2368-RzdLg@mail.gmail.com> 2014-08-27 3:35 ` [net-next v3 3/3] l2tp: Refactor l2tp core " Andy Zhou ` (2 subsequent siblings) 3 siblings, 2 replies; 10+ messages in thread From: Andy Zhou @ 2014-08-27 3:35 UTC (permalink / raw) To: davem; +Cc: netdev, Andy Zhou Signed-off-by: Andy Zhou <azhou@nicira.com> --- drivers/net/vxlan.c | 199 ++++++++++++----------------------------- include/net/vxlan.h | 16 ++-- net/openvswitch/vport-vxlan.c | 6 +- 3 files changed, 68 insertions(+), 153 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index beb377b..f1f1c48 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -42,6 +42,7 @@ #include <net/netns/generic.h> #include <net/vxlan.h> #include <net/protocol.h> +#include <net/udp_tunnel.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> #include <net/addrconf.h> @@ -277,13 +278,7 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb) /* Find VXLAN socket based on network namespace and UDP port */ static struct vxlan_sock *vxlan_find_sock(struct net *net, __be16 port) { - struct vxlan_sock *vs; - - hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) { - if (inet_sk(vs->sock->sk)->inet_sport == port) - return vs; - } - return NULL; + return (struct vxlan_sock *)udp_tunnel_find_sock(net, port); } static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id) @@ -636,7 +631,7 @@ static int vxlan_gro_complete(struct sk_buff *skb, int nhoff) static void vxlan_notify_add_rx_port(struct vxlan_sock *vs) { struct net_device *dev; - struct sock *sk = vs->sock->sk; + struct sock *sk = vs->uts.sock->sk; struct net *net = sock_net(sk); sa_family_t sa_family = sk->sk_family; __be16 port = inet_sk(sk)->inet_sport; @@ -661,7 +656,7 @@ static void vxlan_notify_add_rx_port(struct vxlan_sock *vs) static void vxlan_notify_del_rx_port(struct vxlan_sock *vs) { struct net_device *dev; - struct sock *sk = vs->sock->sk; + struct sock *sk = vs->uts.sock->sk; struct net *net = sock_net(sk); sa_family_t sa_family = sk->sk_family; __be16 port = inet_sk(sk)->inet_sport; @@ -1053,7 +1048,7 @@ static void vxlan_sock_hold(struct vxlan_sock *vs) void vxlan_sock_release(struct vxlan_sock *vs) { - struct sock *sk = vs->sock->sk; + struct sock *sk = vs->uts.sock->sk; struct net *net = sock_net(sk); struct vxlan_net *vn = net_generic(net, vxlan_net_id); @@ -1062,7 +1057,6 @@ void vxlan_sock_release(struct vxlan_sock *vs) spin_lock(&vn->sock_lock); hlist_del_rcu(&vs->hlist); - rcu_assign_sk_user_data(vs->sock->sk, NULL); vxlan_notify_del_rx_port(vs); spin_unlock(&vn->sock_lock); @@ -1078,7 +1072,7 @@ static void vxlan_igmp_join(struct work_struct *work) { struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_join); struct vxlan_sock *vs = vxlan->vn_sock; - struct sock *sk = vs->sock->sk; + struct sock *sk = vs->uts.sock->sk; union vxlan_addr *ip = &vxlan->default_dst.remote_ip; int ifindex = vxlan->default_dst.remote_ifindex; @@ -1107,7 +1101,7 @@ static void vxlan_igmp_leave(struct work_struct *work) { struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_leave); struct vxlan_sock *vs = vxlan->vn_sock; - struct sock *sk = vs->sock->sk; + struct sock *sk = vs->uts.sock->sk; union vxlan_addr *ip = &vxlan->default_dst.remote_ip; int ifindex = vxlan->default_dst.remote_ifindex; @@ -1338,7 +1332,6 @@ out: } #if IS_ENABLED(CONFIG_IPV6) - static struct sk_buff *vxlan_na_create(struct sk_buff *request, struct neighbour *n, bool isrouter) { @@ -1572,13 +1565,6 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) return false; } -static inline struct sk_buff *vxlan_handle_offloads(struct sk_buff *skb, - bool udp_csum) -{ - int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; - return iptunnel_handle_offloads(skb, udp_csum, type); -} - #if IS_ENABLED(CONFIG_IPV6) static int vxlan6_xmit_skb(struct vxlan_sock *vs, struct dst_entry *dst, struct sk_buff *skb, @@ -1587,13 +1573,13 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs, __be16 src_port, __be16 dst_port, __be32 vni, bool xnet) { - struct ipv6hdr *ip6h; struct vxlanhdr *vxh; - struct udphdr *uh; int min_headroom; int err; - skb = vxlan_handle_offloads(skb, !udp_get_no_check6_tx(vs->sock->sk)); + skb = udp_tunnel_handle_offloads(skb, + !udp_get_no_check6_tx( + vs->uts.sock->sk)); if (IS_ERR(skb)) return -EINVAL; @@ -1621,38 +1607,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs, vxh->vx_flags = htonl(VXLAN_FLAGS); vxh->vx_vni = vni; - __skb_push(skb, sizeof(*uh)); - skb_reset_transport_header(skb); - uh = udp_hdr(skb); - - uh->dest = dst_port; - uh->source = src_port; - - uh->len = htons(skb->len); - - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | - IPSKB_REROUTED); - skb_dst_set(skb, dst); - - udp6_set_csum(udp_get_no_check6_tx(vs->sock->sk), skb, - saddr, daddr, skb->len); - - __skb_push(skb, sizeof(*ip6h)); - skb_reset_network_header(skb); - ip6h = ipv6_hdr(skb); - ip6h->version = 6; - ip6h->priority = prio; - ip6h->flow_lbl[0] = 0; - ip6h->flow_lbl[1] = 0; - ip6h->flow_lbl[2] = 0; - ip6h->payload_len = htons(skb->len); - ip6h->nexthdr = IPPROTO_UDP; - ip6h->hop_limit = ttl; - ip6h->daddr = *daddr; - ip6h->saddr = *saddr; - - ip6tunnel_xmit(skb, dev); + udp_tunnel6_xmit_skb(vs->uts.sock, dst, skb, dev, saddr, daddr, prio, + ttl, src_port, dst_port); return 0; } #endif @@ -1663,11 +1619,11 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, __be16 src_port, __be16 dst_port, __be32 vni, bool xnet) { struct vxlanhdr *vxh; - struct udphdr *uh; int min_headroom; int err; - skb = vxlan_handle_offloads(skb, !vs->sock->sk->sk_no_check_tx); + skb = udp_tunnel_handle_offloads(skb, + !vs->uts.sock->sk->sk_no_check_tx); if (IS_ERR(skb)) return -EINVAL; @@ -1693,20 +1649,8 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, vxh->vx_flags = htonl(VXLAN_FLAGS); vxh->vx_vni = vni; - __skb_push(skb, sizeof(*uh)); - skb_reset_transport_header(skb); - uh = udp_hdr(skb); - - uh->dest = dst_port; - uh->source = src_port; - - uh->len = htons(skb->len); - - udp_set_csum(vs->sock->sk->sk_no_check_tx, skb, - src, dst, skb->len); - - return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP, - tos, ttl, df, xnet); + return udp_tunnel_xmit_skb(vs->uts.sock, rt, skb, src, dst, tos, + ttl, df, src_port, dst_port, xnet); } EXPORT_SYMBOL_GPL(vxlan_xmit_skb); @@ -1831,18 +1775,18 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, tos = ip_tunnel_ecn_encap(tos, old_iph, skb); ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); - err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb, - fl4.saddr, dst->sin.sin_addr.s_addr, - tos, ttl, df, src_port, dst_port, - htonl(vni << 8), - !net_eq(vxlan->net, dev_net(vxlan->dev))); + err = udp_tunnel_xmit_skb(vxlan->vn_sock->uts.sock, rt, skb, + fl4.saddr, dst->sin.sin_addr.s_addr, + tos, ttl, df, src_port, dst_port, + !net_eq(vxlan->net, + dev_net(vxlan->dev))); if (err < 0) goto rt_tx_error; iptunnel_xmit_stats(err, &dev->stats, dev->tstats); #if IS_ENABLED(CONFIG_IPV6) } else { - struct sock *sk = vxlan->vn_sock->sock->sk; + struct sock *sk = vxlan->vn_sock->uts.sock->sk; struct dst_entry *ndst; struct flowi6 fl6; u32 flags; @@ -2204,8 +2148,8 @@ void vxlan_get_rx_port(struct net_device *dev) spin_lock(&vn->sock_lock); for (i = 0; i < PORT_HASH_SIZE; ++i) { hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) { - port = inet_sk(vs->sock->sk)->inet_sport; - sa_family = vs->sock->sk->sk_family; + port = inet_sk(vs->uts.sock->sk)->inet_sport; + sa_family = vs->uts.sock->sk->sk_family; dev->netdev_ops->ndo_add_vxlan_port(dev, sa_family, port); } @@ -2335,79 +2279,60 @@ static const struct ethtool_ops vxlan_ethtool_ops = { static void vxlan_del_work(struct work_struct *work) { struct vxlan_sock *vs = container_of(work, struct vxlan_sock, del_work); - - sk_release_kernel(vs->sock->sk); + udp_tunnel_sock_release(&vs->uts); kfree_rcu(vs, rcu); } -static struct socket *vxlan_create_sock(struct net *net, bool ipv6, - __be16 port, u32 flags) +/* Create new listen socket if needed */ +static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port, + vxlan_rcv_t rcv, void *data, + u32 flags) { - struct socket *sock; - struct udp_port_cfg udp_conf; - int err; + struct vxlan_net *vn = net_generic(net, vxlan_net_id); + struct vxlan_sock *vs; + struct udp_tunnel_socket_cfg vxlan_ts_cfg; + bool ipv6 = !!(flags & VXLAN_F_IPV6); + unsigned int h; - memset(&udp_conf, 0, sizeof(udp_conf)); + memset(&vxlan_ts_cfg, 0, sizeof(struct udp_tunnel_socket_cfg)); if (ipv6) { - udp_conf.family = AF_INET6; - udp_conf.use_udp6_tx_checksums = + vxlan_ts_cfg.port.family = AF_INET6; + vxlan_ts_cfg.port.use_udp6_tx_checksums = !!(flags & VXLAN_F_UDP_ZERO_CSUM6_TX); - udp_conf.use_udp6_rx_checksums = + vxlan_ts_cfg.port.use_udp6_rx_checksums = !!(flags & VXLAN_F_UDP_ZERO_CSUM6_RX); } else { - udp_conf.family = AF_INET; - udp_conf.local_ip.s_addr = INADDR_ANY; - udp_conf.use_udp_checksums = + vxlan_ts_cfg.port.family = AF_INET; + vxlan_ts_cfg.port.local_ip.s_addr = INADDR_ANY; + vxlan_ts_cfg.port.use_udp_checksums = !!(flags & VXLAN_F_UDP_CSUM); } - udp_conf.local_udp_port = port; + vxlan_ts_cfg.port.local_udp_port = port; + vxlan_ts_cfg.encap_type = 1; + vxlan_ts_cfg.encap_rcv = vxlan_udp_encap_recv; + vxlan_ts_cfg.encap_destroy = NULL; - /* Open UDP socket */ - err = udp_sock_create(net, &udp_conf, &sock); - if (err < 0) - return ERR_PTR(err); - - /* Disable multicast loopback */ - inet_sk(sock->sk)->mc_loop = 0; - - return sock; -} - -/* Create new listen socket if needed */ -static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port, - vxlan_rcv_t *rcv, void *data, - u32 flags) -{ - struct vxlan_net *vn = net_generic(net, vxlan_net_id); - struct vxlan_sock *vs; - struct socket *sock; - struct sock *sk; - unsigned int h; - bool ipv6 = !!(flags & VXLAN_F_IPV6); - - vs = kzalloc(sizeof(*vs), GFP_KERNEL); + vs = (struct vxlan_sock *)create_udp_tunnel_sock(net, sizeof(*vs), + NULL, + &vxlan_ts_cfg); if (!vs) return ERR_PTR(-ENOMEM); for (h = 0; h < VNI_HASH_SIZE; ++h) INIT_HLIST_HEAD(&vs->vni_list[h]); - INIT_WORK(&vs->del_work, vxlan_del_work); + spin_lock(&vn->sock_lock); + list_add(&vs->next, &vn->vxlan_list); + spin_unlock(&vn->sock_lock); - sock = vxlan_create_sock(net, ipv6, port, flags); - if (IS_ERR(sock)) { - kfree(vs); - return ERR_CAST(sock); - } + INIT_WORK(&vs->del_work, vxlan_del_work); - vs->sock = sock; - sk = sock->sk; atomic_set(&vs->refcnt, 1); + vs->rcv = rcv; - vs->data = data; - rcu_assign_sk_user_data(vs->sock->sk, vs); + vs->rcv_data = data; /* Initialize the vxlan udp offloads structure */ vs->udp_offloads.port = port; @@ -2419,24 +2344,13 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port, vxlan_notify_add_rx_port(vs); spin_unlock(&vn->sock_lock); - /* Mark socket as an encapsulation socket. */ - udp_sk(sk)->encap_type = 1; - udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv; -#if IS_ENABLED(CONFIG_IPV6) - if (ipv6) - ipv6_stub->udpv6_encap_enable(); - else -#endif - udp_encap_enable(); - return vs; } struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, - vxlan_rcv_t *rcv, void *data, + vxlan_rcv_t rcv, void *data, bool no_share, u32 flags) { - struct vxlan_net *vn = net_generic(net, vxlan_net_id); struct vxlan_sock *vs; vs = vxlan_socket_create(net, port, rcv, data, flags); @@ -2446,7 +2360,6 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, if (no_share) /* Return error if sharing is not allowed. */ return vs; - spin_lock(&vn->sock_lock); vs = vxlan_find_sock(net, port); if (vs) { if (vs->rcv == rcv) @@ -2454,7 +2367,6 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, else vs = ERR_PTR(-EBUSY); } - spin_unlock(&vn->sock_lock); if (!vs) vs = ERR_PTR(-EINVAL); @@ -2634,7 +2546,6 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, } list_add(&vxlan->next, &vn->vxlan_list); - return 0; } diff --git a/include/net/vxlan.h b/include/net/vxlan.h index d5f59f3..10bfc13 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -4,23 +4,27 @@ #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/udp.h> +#include <net/udp_tunnel.h> #define VNI_HASH_BITS 10 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS) struct vxlan_sock; -typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, __be32 key); -/* per UDP socket information */ +typedef void (*vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb, + __be32 key); + +/* per vxlan socket information */ struct vxlan_sock { + struct udp_tunnel_sock uts; /* Must be the first member */ struct hlist_node hlist; - vxlan_rcv_t *rcv; - void *data; + struct list_head next; struct work_struct del_work; - struct socket *sock; struct rcu_head rcu; struct hlist_head vni_list[VNI_HASH_SIZE]; atomic_t refcnt; + vxlan_rcv_t rcv; + void *rcv_data; struct udp_offload udp_offloads; }; @@ -35,7 +39,7 @@ struct vxlan_sock { #define VXLAN_F_UDP_ZERO_CSUM6_RX 0x100 struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, - vxlan_rcv_t *rcv, void *data, + vxlan_rcv_t rcv, void *data, bool no_share, u32 flags); void vxlan_sock_release(struct vxlan_sock *vs); diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index d8b7e24..7599efd 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -59,7 +59,7 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport) static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) { struct ovs_key_ipv4_tunnel tun_key; - struct vport *vport = vs->data; + struct vport *vport = vs->rcv_data; struct iphdr *iph; __be64 key; @@ -74,7 +74,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb) { struct vxlan_port *vxlan_port = vxlan_vport(vport); - __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport; + __be16 dst_port = inet_sk(vxlan_port->vs->uts.sock->sk)->inet_sport; if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port))) return -EMSGSIZE; @@ -139,7 +139,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) { struct net *net = ovs_dp_get_net(vport->dp); struct vxlan_port *vxlan_port = vxlan_vport(vport); - __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport; + __be16 dst_port = inet_sk(vxlan_port->vs->uts.sock->sk)->inet_sport; struct rtable *rt; struct flowi4 fl; __be16 src_port; -- 1.7.9.5 ^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [net-next v3 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions. 2014-08-27 3:35 ` [net-next v3 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions Andy Zhou @ 2014-08-27 4:15 ` Tom Herbert [not found] ` <CA+mtBx8R2YyAMbPuw=iYDbLpSwRo_robnxOYPUFV2368-RzdLg@mail.gmail.com> 1 sibling, 0 replies; 10+ messages in thread From: Tom Herbert @ 2014-08-27 4:15 UTC (permalink / raw) To: Andy Zhou; +Cc: David Miller, Linux Netdev List On Tue, Aug 26, 2014 at 8:35 PM, Andy Zhou <azhou@nicira.com> wrote: > Signed-off-by: Andy Zhou <azhou@nicira.com> > --- > drivers/net/vxlan.c | 199 ++++++++++++----------------------------- > include/net/vxlan.h | 16 ++-- > net/openvswitch/vport-vxlan.c | 6 +- > 3 files changed, 68 insertions(+), 153 deletions(-) > > diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c > index beb377b..f1f1c48 100644 > --- a/drivers/net/vxlan.c > +++ b/drivers/net/vxlan.c > @@ -42,6 +42,7 @@ > #include <net/netns/generic.h> > #include <net/vxlan.h> > #include <net/protocol.h> > +#include <net/udp_tunnel.h> > #if IS_ENABLED(CONFIG_IPV6) > #include <net/ipv6.h> > #include <net/addrconf.h> > @@ -277,13 +278,7 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb) > /* Find VXLAN socket based on network namespace and UDP port */ > static struct vxlan_sock *vxlan_find_sock(struct net *net, __be16 port) > { > - struct vxlan_sock *vs; > - > - hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) { > - if (inet_sk(vs->sock->sk)->inet_sport == port) > - return vs; > - } > - return NULL; > + return (struct vxlan_sock *)udp_tunnel_find_sock(net, port); It seems incorrect to assume that the socket returned is vxlan, presumably this could be some other type of tunnel socket. > } > > static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id) > @@ -636,7 +631,7 @@ static int vxlan_gro_complete(struct sk_buff *skb, int nhoff) > static void vxlan_notify_add_rx_port(struct vxlan_sock *vs) > { > struct net_device *dev; > - struct sock *sk = vs->sock->sk; > + struct sock *sk = vs->uts.sock->sk; > struct net *net = sock_net(sk); > sa_family_t sa_family = sk->sk_family; > __be16 port = inet_sk(sk)->inet_sport; > @@ -661,7 +656,7 @@ static void vxlan_notify_add_rx_port(struct vxlan_sock *vs) > static void vxlan_notify_del_rx_port(struct vxlan_sock *vs) > { > struct net_device *dev; > - struct sock *sk = vs->sock->sk; > + struct sock *sk = vs->uts.sock->sk; > struct net *net = sock_net(sk); > sa_family_t sa_family = sk->sk_family; > __be16 port = inet_sk(sk)->inet_sport; > @@ -1053,7 +1048,7 @@ static void vxlan_sock_hold(struct vxlan_sock *vs) > > void vxlan_sock_release(struct vxlan_sock *vs) > { > - struct sock *sk = vs->sock->sk; > + struct sock *sk = vs->uts.sock->sk; > struct net *net = sock_net(sk); > struct vxlan_net *vn = net_generic(net, vxlan_net_id); > > @@ -1062,7 +1057,6 @@ void vxlan_sock_release(struct vxlan_sock *vs) > > spin_lock(&vn->sock_lock); > hlist_del_rcu(&vs->hlist); > - rcu_assign_sk_user_data(vs->sock->sk, NULL); > vxlan_notify_del_rx_port(vs); > spin_unlock(&vn->sock_lock); > > @@ -1078,7 +1072,7 @@ static void vxlan_igmp_join(struct work_struct *work) > { > struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_join); > struct vxlan_sock *vs = vxlan->vn_sock; > - struct sock *sk = vs->sock->sk; > + struct sock *sk = vs->uts.sock->sk; > union vxlan_addr *ip = &vxlan->default_dst.remote_ip; > int ifindex = vxlan->default_dst.remote_ifindex; > > @@ -1107,7 +1101,7 @@ static void vxlan_igmp_leave(struct work_struct *work) > { > struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_leave); > struct vxlan_sock *vs = vxlan->vn_sock; > - struct sock *sk = vs->sock->sk; > + struct sock *sk = vs->uts.sock->sk; > union vxlan_addr *ip = &vxlan->default_dst.remote_ip; > int ifindex = vxlan->default_dst.remote_ifindex; > > @@ -1338,7 +1332,6 @@ out: > } > > #if IS_ENABLED(CONFIG_IPV6) > - > static struct sk_buff *vxlan_na_create(struct sk_buff *request, > struct neighbour *n, bool isrouter) > { > @@ -1572,13 +1565,6 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) > return false; > } > > -static inline struct sk_buff *vxlan_handle_offloads(struct sk_buff *skb, > - bool udp_csum) > -{ > - int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; > - return iptunnel_handle_offloads(skb, udp_csum, type); > -} > - > #if IS_ENABLED(CONFIG_IPV6) > static int vxlan6_xmit_skb(struct vxlan_sock *vs, > struct dst_entry *dst, struct sk_buff *skb, > @@ -1587,13 +1573,13 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs, > __be16 src_port, __be16 dst_port, __be32 vni, > bool xnet) > { > - struct ipv6hdr *ip6h; > struct vxlanhdr *vxh; > - struct udphdr *uh; > int min_headroom; > int err; > > - skb = vxlan_handle_offloads(skb, !udp_get_no_check6_tx(vs->sock->sk)); > + skb = udp_tunnel_handle_offloads(skb, > + !udp_get_no_check6_tx( > + vs->uts.sock->sk)); > if (IS_ERR(skb)) > return -EINVAL; > > @@ -1621,38 +1607,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs, > vxh->vx_flags = htonl(VXLAN_FLAGS); > vxh->vx_vni = vni; > > - __skb_push(skb, sizeof(*uh)); > - skb_reset_transport_header(skb); > - uh = udp_hdr(skb); > - > - uh->dest = dst_port; > - uh->source = src_port; > - > - uh->len = htons(skb->len); > - > - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); > - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | > - IPSKB_REROUTED); > - skb_dst_set(skb, dst); > - > - udp6_set_csum(udp_get_no_check6_tx(vs->sock->sk), skb, > - saddr, daddr, skb->len); > - > - __skb_push(skb, sizeof(*ip6h)); > - skb_reset_network_header(skb); > - ip6h = ipv6_hdr(skb); > - ip6h->version = 6; > - ip6h->priority = prio; > - ip6h->flow_lbl[0] = 0; > - ip6h->flow_lbl[1] = 0; > - ip6h->flow_lbl[2] = 0; > - ip6h->payload_len = htons(skb->len); > - ip6h->nexthdr = IPPROTO_UDP; > - ip6h->hop_limit = ttl; > - ip6h->daddr = *daddr; > - ip6h->saddr = *saddr; > - > - ip6tunnel_xmit(skb, dev); > + udp_tunnel6_xmit_skb(vs->uts.sock, dst, skb, dev, saddr, daddr, prio, > + ttl, src_port, dst_port); > return 0; > } > #endif > @@ -1663,11 +1619,11 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, > __be16 src_port, __be16 dst_port, __be32 vni, bool xnet) > { > struct vxlanhdr *vxh; > - struct udphdr *uh; > int min_headroom; > int err; > > - skb = vxlan_handle_offloads(skb, !vs->sock->sk->sk_no_check_tx); > + skb = udp_tunnel_handle_offloads(skb, > + !vs->uts.sock->sk->sk_no_check_tx); > if (IS_ERR(skb)) > return -EINVAL; > > @@ -1693,20 +1649,8 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, > vxh->vx_flags = htonl(VXLAN_FLAGS); > vxh->vx_vni = vni; > > - __skb_push(skb, sizeof(*uh)); > - skb_reset_transport_header(skb); > - uh = udp_hdr(skb); > - > - uh->dest = dst_port; > - uh->source = src_port; > - > - uh->len = htons(skb->len); > - > - udp_set_csum(vs->sock->sk->sk_no_check_tx, skb, > - src, dst, skb->len); > - > - return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP, > - tos, ttl, df, xnet); > + return udp_tunnel_xmit_skb(vs->uts.sock, rt, skb, src, dst, tos, > + ttl, df, src_port, dst_port, xnet); > } > EXPORT_SYMBOL_GPL(vxlan_xmit_skb); > > @@ -1831,18 +1775,18 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, > tos = ip_tunnel_ecn_encap(tos, old_iph, skb); > ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); > > - err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb, > - fl4.saddr, dst->sin.sin_addr.s_addr, > - tos, ttl, df, src_port, dst_port, > - htonl(vni << 8), > - !net_eq(vxlan->net, dev_net(vxlan->dev))); > + err = udp_tunnel_xmit_skb(vxlan->vn_sock->uts.sock, rt, skb, > + fl4.saddr, dst->sin.sin_addr.s_addr, > + tos, ttl, df, src_port, dst_port, > + !net_eq(vxlan->net, > + dev_net(vxlan->dev))); > > if (err < 0) > goto rt_tx_error; > iptunnel_xmit_stats(err, &dev->stats, dev->tstats); > #if IS_ENABLED(CONFIG_IPV6) > } else { > - struct sock *sk = vxlan->vn_sock->sock->sk; > + struct sock *sk = vxlan->vn_sock->uts.sock->sk; > struct dst_entry *ndst; > struct flowi6 fl6; > u32 flags; > @@ -2204,8 +2148,8 @@ void vxlan_get_rx_port(struct net_device *dev) > spin_lock(&vn->sock_lock); > for (i = 0; i < PORT_HASH_SIZE; ++i) { > hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) { > - port = inet_sk(vs->sock->sk)->inet_sport; > - sa_family = vs->sock->sk->sk_family; > + port = inet_sk(vs->uts.sock->sk)->inet_sport; > + sa_family = vs->uts.sock->sk->sk_family; > dev->netdev_ops->ndo_add_vxlan_port(dev, sa_family, > port); > } > @@ -2335,79 +2279,60 @@ static const struct ethtool_ops vxlan_ethtool_ops = { > static void vxlan_del_work(struct work_struct *work) > { > struct vxlan_sock *vs = container_of(work, struct vxlan_sock, del_work); > - > - sk_release_kernel(vs->sock->sk); > + udp_tunnel_sock_release(&vs->uts); > kfree_rcu(vs, rcu); > } > > -static struct socket *vxlan_create_sock(struct net *net, bool ipv6, > - __be16 port, u32 flags) > +/* Create new listen socket if needed */ > +static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port, > + vxlan_rcv_t rcv, void *data, > + u32 flags) > { > - struct socket *sock; > - struct udp_port_cfg udp_conf; > - int err; > + struct vxlan_net *vn = net_generic(net, vxlan_net_id); > + struct vxlan_sock *vs; > + struct udp_tunnel_socket_cfg vxlan_ts_cfg; > + bool ipv6 = !!(flags & VXLAN_F_IPV6); > + unsigned int h; > > - memset(&udp_conf, 0, sizeof(udp_conf)); > + memset(&vxlan_ts_cfg, 0, sizeof(struct udp_tunnel_socket_cfg)); > > if (ipv6) { > - udp_conf.family = AF_INET6; > - udp_conf.use_udp6_tx_checksums = > + vxlan_ts_cfg.port.family = AF_INET6; > + vxlan_ts_cfg.port.use_udp6_tx_checksums = > !!(flags & VXLAN_F_UDP_ZERO_CSUM6_TX); > - udp_conf.use_udp6_rx_checksums = > + vxlan_ts_cfg.port.use_udp6_rx_checksums = > !!(flags & VXLAN_F_UDP_ZERO_CSUM6_RX); > } else { > - udp_conf.family = AF_INET; > - udp_conf.local_ip.s_addr = INADDR_ANY; > - udp_conf.use_udp_checksums = > + vxlan_ts_cfg.port.family = AF_INET; > + vxlan_ts_cfg.port.local_ip.s_addr = INADDR_ANY; > + vxlan_ts_cfg.port.use_udp_checksums = > !!(flags & VXLAN_F_UDP_CSUM); > } > > - udp_conf.local_udp_port = port; > + vxlan_ts_cfg.port.local_udp_port = port; > + vxlan_ts_cfg.encap_type = 1; > + vxlan_ts_cfg.encap_rcv = vxlan_udp_encap_recv; > + vxlan_ts_cfg.encap_destroy = NULL; > > - /* Open UDP socket */ > - err = udp_sock_create(net, &udp_conf, &sock); > - if (err < 0) > - return ERR_PTR(err); > - > - /* Disable multicast loopback */ > - inet_sk(sock->sk)->mc_loop = 0; > - > - return sock; > -} > - > -/* Create new listen socket if needed */ > -static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port, > - vxlan_rcv_t *rcv, void *data, > - u32 flags) > -{ > - struct vxlan_net *vn = net_generic(net, vxlan_net_id); > - struct vxlan_sock *vs; > - struct socket *sock; > - struct sock *sk; > - unsigned int h; > - bool ipv6 = !!(flags & VXLAN_F_IPV6); > - > - vs = kzalloc(sizeof(*vs), GFP_KERNEL); > + vs = (struct vxlan_sock *)create_udp_tunnel_sock(net, sizeof(*vs), > + NULL, > + &vxlan_ts_cfg); > if (!vs) > return ERR_PTR(-ENOMEM); > > for (h = 0; h < VNI_HASH_SIZE; ++h) > INIT_HLIST_HEAD(&vs->vni_list[h]); > > - INIT_WORK(&vs->del_work, vxlan_del_work); > + spin_lock(&vn->sock_lock); > + list_add(&vs->next, &vn->vxlan_list); > + spin_unlock(&vn->sock_lock); > > - sock = vxlan_create_sock(net, ipv6, port, flags); > - if (IS_ERR(sock)) { > - kfree(vs); > - return ERR_CAST(sock); > - } > + INIT_WORK(&vs->del_work, vxlan_del_work); > > - vs->sock = sock; > - sk = sock->sk; > atomic_set(&vs->refcnt, 1); > + > vs->rcv = rcv; > - vs->data = data; > - rcu_assign_sk_user_data(vs->sock->sk, vs); > + vs->rcv_data = data; > > /* Initialize the vxlan udp offloads structure */ > vs->udp_offloads.port = port; > @@ -2419,24 +2344,13 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port, > vxlan_notify_add_rx_port(vs); > spin_unlock(&vn->sock_lock); > > - /* Mark socket as an encapsulation socket. */ > - udp_sk(sk)->encap_type = 1; > - udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv; > -#if IS_ENABLED(CONFIG_IPV6) > - if (ipv6) > - ipv6_stub->udpv6_encap_enable(); > - else > -#endif > - udp_encap_enable(); > - > return vs; > } > > struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, > - vxlan_rcv_t *rcv, void *data, > + vxlan_rcv_t rcv, void *data, > bool no_share, u32 flags) > { > - struct vxlan_net *vn = net_generic(net, vxlan_net_id); > struct vxlan_sock *vs; > > vs = vxlan_socket_create(net, port, rcv, data, flags); > @@ -2446,7 +2360,6 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, > if (no_share) /* Return error if sharing is not allowed. */ > return vs; > > - spin_lock(&vn->sock_lock); > vs = vxlan_find_sock(net, port); > if (vs) { > if (vs->rcv == rcv) > @@ -2454,7 +2367,6 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, > else > vs = ERR_PTR(-EBUSY); > } > - spin_unlock(&vn->sock_lock); > > if (!vs) > vs = ERR_PTR(-EINVAL); > @@ -2634,7 +2546,6 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, > } > > list_add(&vxlan->next, &vn->vxlan_list); > - > return 0; > } > > diff --git a/include/net/vxlan.h b/include/net/vxlan.h > index d5f59f3..10bfc13 100644 > --- a/include/net/vxlan.h > +++ b/include/net/vxlan.h > @@ -4,23 +4,27 @@ > #include <linux/skbuff.h> > #include <linux/netdevice.h> > #include <linux/udp.h> > +#include <net/udp_tunnel.h> > > #define VNI_HASH_BITS 10 > #define VNI_HASH_SIZE (1<<VNI_HASH_BITS) > > struct vxlan_sock; > -typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, __be32 key); > > -/* per UDP socket information */ > +typedef void (*vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb, > + __be32 key); > + > +/* per vxlan socket information */ > struct vxlan_sock { > + struct udp_tunnel_sock uts; /* Must be the first member */ > struct hlist_node hlist; > - vxlan_rcv_t *rcv; > - void *data; > + struct list_head next; > struct work_struct del_work; > - struct socket *sock; > struct rcu_head rcu; > struct hlist_head vni_list[VNI_HASH_SIZE]; > atomic_t refcnt; > + vxlan_rcv_t rcv; > + void *rcv_data; > struct udp_offload udp_offloads; > }; > > @@ -35,7 +39,7 @@ struct vxlan_sock { > #define VXLAN_F_UDP_ZERO_CSUM6_RX 0x100 > > struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, > - vxlan_rcv_t *rcv, void *data, > + vxlan_rcv_t rcv, void *data, > bool no_share, u32 flags); > > void vxlan_sock_release(struct vxlan_sock *vs); > diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c > index d8b7e24..7599efd 100644 > --- a/net/openvswitch/vport-vxlan.c > +++ b/net/openvswitch/vport-vxlan.c > @@ -59,7 +59,7 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport) > static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) > { > struct ovs_key_ipv4_tunnel tun_key; > - struct vport *vport = vs->data; > + struct vport *vport = vs->rcv_data; > struct iphdr *iph; > __be64 key; > > @@ -74,7 +74,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) > static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb) > { > struct vxlan_port *vxlan_port = vxlan_vport(vport); > - __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport; > + __be16 dst_port = inet_sk(vxlan_port->vs->uts.sock->sk)->inet_sport; > > if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port))) > return -EMSGSIZE; > @@ -139,7 +139,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) > { > struct net *net = ovs_dp_get_net(vport->dp); > struct vxlan_port *vxlan_port = vxlan_vport(vport); > - __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport; > + __be16 dst_port = inet_sk(vxlan_port->vs->uts.sock->sk)->inet_sport; > struct rtable *rt; > struct flowi4 fl; > __be16 src_port; > -- > 1.7.9.5 > > -- > To unsubscribe from this list: send the line "unsubscribe netdev" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 10+ messages in thread
[parent not found: <CA+mtBx8R2YyAMbPuw=iYDbLpSwRo_robnxOYPUFV2368-RzdLg@mail.gmail.com>]
* Re: [net-next v3 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions. [not found] ` <CA+mtBx8R2YyAMbPuw=iYDbLpSwRo_robnxOYPUFV2368-RzdLg@mail.gmail.com> @ 2014-08-27 5:19 ` Andy Zhou 0 siblings, 0 replies; 10+ messages in thread From: Andy Zhou @ 2014-08-27 5:19 UTC (permalink / raw) To: Tom Herbert; +Cc: David Miller, Linux Netdev List On Tue, Aug 26, 2014 at 9:14 PM, Tom Herbert <therbert@google.com> wrote: > > > On Tue, Aug 26, 2014 at 8:35 PM, Andy Zhou <azhou@nicira.com> wrote: >> Signed-off-by: Andy Zhou <azhou@nicira.com> >> --- >> drivers/net/vxlan.c | 199 >> ++++++++++++----------------------------- >> include/net/vxlan.h | 16 ++-- >> net/openvswitch/vport-vxlan.c | 6 +- >> 3 files changed, 68 insertions(+), 153 deletions(-) >> >> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c >> index beb377b..f1f1c48 100644 >> --- a/drivers/net/vxlan.c >> +++ b/drivers/net/vxlan.c >> @@ -42,6 +42,7 @@ >> #include <net/netns/generic.h> >> #include <net/vxlan.h> >> #include <net/protocol.h> >> +#include <net/udp_tunnel.h> >> #if IS_ENABLED(CONFIG_IPV6) >> #include <net/ipv6.h> >> #include <net/addrconf.h> >> @@ -277,13 +278,7 @@ static inline struct vxlan_rdst >> *first_remote_rtnl(struct vxlan_fdb *fdb) >> /* Find VXLAN socket based on network namespace and UDP port */ >> static struct vxlan_sock *vxlan_find_sock(struct net *net, __be16 port) >> { >> - struct vxlan_sock *vs; >> - >> - hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) { >> - if (inet_sk(vs->sock->sk)->inet_sport == port) >> - return vs; >> - } >> - return NULL; >> + return (struct vxlan_sock *)udp_tunnel_find_sock(net, port); > > It seems incorrect to assume that the socket returned is vxlan, presumably > this could be some other type of tunnel socket. > If I drop udp_tunnel_find_sock API from last patch, then this should not be an issue any more right? vxlan driver will just keep track of its own open sock. >> } >> >> static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id) >> @@ -636,7 +631,7 @@ static int vxlan_gro_complete(struct sk_buff *skb, int >> nhoff) >> static void vxlan_notify_add_rx_port(struct vxlan_sock *vs) >> { >> struct net_device *dev; >> - struct sock *sk = vs->sock->sk; >> + struct sock *sk = vs->uts.sock->sk; >> struct net *net = sock_net(sk); >> sa_family_t sa_family = sk->sk_family; >> __be16 port = inet_sk(sk)->inet_sport; >> @@ -661,7 +656,7 @@ static void vxlan_notify_add_rx_port(struct vxlan_sock >> *vs) >> static void vxlan_notify_del_rx_port(struct vxlan_sock *vs) >> { >> struct net_device *dev; >> - struct sock *sk = vs->sock->sk; >> + struct sock *sk = vs->uts.sock->sk; >> struct net *net = sock_net(sk); >> sa_family_t sa_family = sk->sk_family; >> __be16 port = inet_sk(sk)->inet_sport; >> @@ -1053,7 +1048,7 @@ static void vxlan_sock_hold(struct vxlan_sock *vs) >> >> void vxlan_sock_release(struct vxlan_sock *vs) >> { >> - struct sock *sk = vs->sock->sk; >> + struct sock *sk = vs->uts.sock->sk; >> struct net *net = sock_net(sk); >> struct vxlan_net *vn = net_generic(net, vxlan_net_id); >> >> @@ -1062,7 +1057,6 @@ void vxlan_sock_release(struct vxlan_sock *vs) >> >> spin_lock(&vn->sock_lock); >> hlist_del_rcu(&vs->hlist); >> - rcu_assign_sk_user_data(vs->sock->sk, NULL); >> vxlan_notify_del_rx_port(vs); >> spin_unlock(&vn->sock_lock); >> >> @@ -1078,7 +1072,7 @@ static void vxlan_igmp_join(struct work_struct >> *work) >> { >> struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, >> igmp_join); >> struct vxlan_sock *vs = vxlan->vn_sock; >> - struct sock *sk = vs->sock->sk; >> + struct sock *sk = vs->uts.sock->sk; >> union vxlan_addr *ip = &vxlan->default_dst.remote_ip; >> int ifindex = vxlan->default_dst.remote_ifindex; >> >> @@ -1107,7 +1101,7 @@ static void vxlan_igmp_leave(struct work_struct >> *work) >> { >> struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, >> igmp_leave); >> struct vxlan_sock *vs = vxlan->vn_sock; >> - struct sock *sk = vs->sock->sk; >> + struct sock *sk = vs->uts.sock->sk; >> union vxlan_addr *ip = &vxlan->default_dst.remote_ip; >> int ifindex = vxlan->default_dst.remote_ifindex; >> >> @@ -1338,7 +1332,6 @@ out: >> } >> >> #if IS_ENABLED(CONFIG_IPV6) >> - >> static struct sk_buff *vxlan_na_create(struct sk_buff *request, >> struct neighbour *n, bool isrouter) >> { >> @@ -1572,13 +1565,6 @@ static bool route_shortcircuit(struct net_device >> *dev, struct sk_buff *skb) >> return false; >> } >> >> -static inline struct sk_buff *vxlan_handle_offloads(struct sk_buff *skb, >> - bool udp_csum) >> -{ >> - int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : >> SKB_GSO_UDP_TUNNEL; >> - return iptunnel_handle_offloads(skb, udp_csum, type); >> -} >> - >> #if IS_ENABLED(CONFIG_IPV6) >> static int vxlan6_xmit_skb(struct vxlan_sock *vs, >> struct dst_entry *dst, struct sk_buff *skb, >> @@ -1587,13 +1573,13 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs, >> __be16 src_port, __be16 dst_port, __be32 vni, >> bool xnet) >> { >> - struct ipv6hdr *ip6h; >> struct vxlanhdr *vxh; >> - struct udphdr *uh; >> int min_headroom; >> int err; >> >> - skb = vxlan_handle_offloads(skb, >> !udp_get_no_check6_tx(vs->sock->sk)); >> + skb = udp_tunnel_handle_offloads(skb, >> + !udp_get_no_check6_tx( >> + vs->uts.sock->sk)); >> if (IS_ERR(skb)) >> return -EINVAL; >> >> @@ -1621,38 +1607,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs, >> vxh->vx_flags = htonl(VXLAN_FLAGS); >> vxh->vx_vni = vni; >> >> - __skb_push(skb, sizeof(*uh)); >> - skb_reset_transport_header(skb); >> - uh = udp_hdr(skb); >> - >> - uh->dest = dst_port; >> - uh->source = src_port; >> - >> - uh->len = htons(skb->len); >> - >> - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); >> - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | >> IPSKB_XFRM_TRANSFORMED | >> - IPSKB_REROUTED); >> - skb_dst_set(skb, dst); >> - >> - udp6_set_csum(udp_get_no_check6_tx(vs->sock->sk), skb, >> - saddr, daddr, skb->len); >> - >> - __skb_push(skb, sizeof(*ip6h)); >> - skb_reset_network_header(skb); >> - ip6h = ipv6_hdr(skb); >> - ip6h->version = 6; >> - ip6h->priority = prio; >> - ip6h->flow_lbl[0] = 0; >> - ip6h->flow_lbl[1] = 0; >> - ip6h->flow_lbl[2] = 0; >> - ip6h->payload_len = htons(skb->len); >> - ip6h->nexthdr = IPPROTO_UDP; >> - ip6h->hop_limit = ttl; >> - ip6h->daddr = *daddr; >> - ip6h->saddr = *saddr; >> - >> - ip6tunnel_xmit(skb, dev); >> + udp_tunnel6_xmit_skb(vs->uts.sock, dst, skb, dev, saddr, daddr, >> prio, >> + ttl, src_port, dst_port); >> return 0; >> } >> #endif >> @@ -1663,11 +1619,11 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, >> __be16 src_port, __be16 dst_port, __be32 vni, bool >> xnet) >> { >> struct vxlanhdr *vxh; >> - struct udphdr *uh; >> int min_headroom; >> int err; >> >> - skb = vxlan_handle_offloads(skb, !vs->sock->sk->sk_no_check_tx); >> + skb = udp_tunnel_handle_offloads(skb, >> + >> !vs->uts.sock->sk->sk_no_check_tx); >> if (IS_ERR(skb)) >> return -EINVAL; >> >> @@ -1693,20 +1649,8 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, >> vxh->vx_flags = htonl(VXLAN_FLAGS); >> vxh->vx_vni = vni; >> >> - __skb_push(skb, sizeof(*uh)); >> - skb_reset_transport_header(skb); >> - uh = udp_hdr(skb); >> - >> - uh->dest = dst_port; >> - uh->source = src_port; >> - >> - uh->len = htons(skb->len); >> - >> - udp_set_csum(vs->sock->sk->sk_no_check_tx, skb, >> - src, dst, skb->len); >> - >> - return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP, >> - tos, ttl, df, xnet); >> + return udp_tunnel_xmit_skb(vs->uts.sock, rt, skb, src, dst, tos, >> + ttl, df, src_port, dst_port, xnet); >> } >> EXPORT_SYMBOL_GPL(vxlan_xmit_skb); >> >> @@ -1831,18 +1775,18 @@ static void vxlan_xmit_one(struct sk_buff *skb, >> struct net_device *dev, >> tos = ip_tunnel_ecn_encap(tos, old_iph, skb); >> ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); >> >> - err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb, >> - fl4.saddr, dst->sin.sin_addr.s_addr, >> - tos, ttl, df, src_port, dst_port, >> - htonl(vni << 8), >> - !net_eq(vxlan->net, >> dev_net(vxlan->dev))); >> + err = udp_tunnel_xmit_skb(vxlan->vn_sock->uts.sock, rt, >> skb, >> + fl4.saddr, >> dst->sin.sin_addr.s_addr, >> + tos, ttl, df, src_port, >> dst_port, >> + !net_eq(vxlan->net, >> + dev_net(vxlan->dev))); >> >> if (err < 0) >> goto rt_tx_error; >> iptunnel_xmit_stats(err, &dev->stats, dev->tstats); >> #if IS_ENABLED(CONFIG_IPV6) >> } else { >> - struct sock *sk = vxlan->vn_sock->sock->sk; >> + struct sock *sk = vxlan->vn_sock->uts.sock->sk; >> struct dst_entry *ndst; >> struct flowi6 fl6; >> u32 flags; >> @@ -2204,8 +2148,8 @@ void vxlan_get_rx_port(struct net_device *dev) >> spin_lock(&vn->sock_lock); >> for (i = 0; i < PORT_HASH_SIZE; ++i) { >> hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) { >> - port = inet_sk(vs->sock->sk)->inet_sport; >> - sa_family = vs->sock->sk->sk_family; >> + port = inet_sk(vs->uts.sock->sk)->inet_sport; >> + sa_family = vs->uts.sock->sk->sk_family; >> dev->netdev_ops->ndo_add_vxlan_port(dev, >> sa_family, >> port); >> } >> @@ -2335,79 +2279,60 @@ static const struct ethtool_ops vxlan_ethtool_ops >> = { >> static void vxlan_del_work(struct work_struct *work) >> { >> struct vxlan_sock *vs = container_of(work, struct vxlan_sock, >> del_work); >> - >> - sk_release_kernel(vs->sock->sk); >> + udp_tunnel_sock_release(&vs->uts); >> kfree_rcu(vs, rcu); >> } >> >> -static struct socket *vxlan_create_sock(struct net *net, bool ipv6, >> - __be16 port, u32 flags) >> +/* Create new listen socket if needed */ >> +static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 >> port, >> + vxlan_rcv_t rcv, void *data, >> + u32 flags) >> { >> - struct socket *sock; >> - struct udp_port_cfg udp_conf; >> - int err; >> + struct vxlan_net *vn = net_generic(net, vxlan_net_id); >> + struct vxlan_sock *vs; >> + struct udp_tunnel_socket_cfg vxlan_ts_cfg; >> + bool ipv6 = !!(flags & VXLAN_F_IPV6); >> + unsigned int h; >> >> - memset(&udp_conf, 0, sizeof(udp_conf)); >> + memset(&vxlan_ts_cfg, 0, sizeof(struct udp_tunnel_socket_cfg)); >> >> if (ipv6) { >> - udp_conf.family = AF_INET6; >> - udp_conf.use_udp6_tx_checksums = >> + vxlan_ts_cfg.port.family = AF_INET6; >> + vxlan_ts_cfg.port.use_udp6_tx_checksums = >> !!(flags & VXLAN_F_UDP_ZERO_CSUM6_TX); >> - udp_conf.use_udp6_rx_checksums = >> + vxlan_ts_cfg.port.use_udp6_rx_checksums = >> !!(flags & VXLAN_F_UDP_ZERO_CSUM6_RX); >> } else { >> - udp_conf.family = AF_INET; >> - udp_conf.local_ip.s_addr = INADDR_ANY; >> - udp_conf.use_udp_checksums = >> + vxlan_ts_cfg.port.family = AF_INET; >> + vxlan_ts_cfg.port.local_ip.s_addr = INADDR_ANY; >> + vxlan_ts_cfg.port.use_udp_checksums = >> !!(flags & VXLAN_F_UDP_CSUM); >> } >> >> - udp_conf.local_udp_port = port; >> + vxlan_ts_cfg.port.local_udp_port = port; >> + vxlan_ts_cfg.encap_type = 1; >> + vxlan_ts_cfg.encap_rcv = vxlan_udp_encap_recv; >> + vxlan_ts_cfg.encap_destroy = NULL; >> >> - /* Open UDP socket */ >> - err = udp_sock_create(net, &udp_conf, &sock); >> - if (err < 0) >> - return ERR_PTR(err); >> - >> - /* Disable multicast loopback */ >> - inet_sk(sock->sk)->mc_loop = 0; >> - >> - return sock; >> -} >> - >> -/* Create new listen socket if needed */ >> -static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 >> port, >> - vxlan_rcv_t *rcv, void >> *data, >> - u32 flags) >> -{ >> - struct vxlan_net *vn = net_generic(net, vxlan_net_id); >> - struct vxlan_sock *vs; >> - struct socket *sock; >> - struct sock *sk; >> - unsigned int h; >> - bool ipv6 = !!(flags & VXLAN_F_IPV6); >> - >> - vs = kzalloc(sizeof(*vs), GFP_KERNEL); >> + vs = (struct vxlan_sock *)create_udp_tunnel_sock(net, sizeof(*vs), >> + NULL, >> + &vxlan_ts_cfg); >> if (!vs) >> return ERR_PTR(-ENOMEM); >> >> for (h = 0; h < VNI_HASH_SIZE; ++h) >> INIT_HLIST_HEAD(&vs->vni_list[h]); >> >> - INIT_WORK(&vs->del_work, vxlan_del_work); >> + spin_lock(&vn->sock_lock); >> + list_add(&vs->next, &vn->vxlan_list); >> + spin_unlock(&vn->sock_lock); >> >> - sock = vxlan_create_sock(net, ipv6, port, flags); >> - if (IS_ERR(sock)) { >> - kfree(vs); >> - return ERR_CAST(sock); >> - } >> + INIT_WORK(&vs->del_work, vxlan_del_work); >> >> - vs->sock = sock; >> - sk = sock->sk; >> atomic_set(&vs->refcnt, 1); >> + >> vs->rcv = rcv; >> - vs->data = data; >> - rcu_assign_sk_user_data(vs->sock->sk, vs); >> + vs->rcv_data = data; >> >> /* Initialize the vxlan udp offloads structure */ >> vs->udp_offloads.port = port; >> @@ -2419,24 +2344,13 @@ static struct vxlan_sock >> *vxlan_socket_create(struct net *net, __be16 port, >> vxlan_notify_add_rx_port(vs); >> spin_unlock(&vn->sock_lock); >> >> - /* Mark socket as an encapsulation socket. */ >> - udp_sk(sk)->encap_type = 1; >> - udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv; >> -#if IS_ENABLED(CONFIG_IPV6) >> - if (ipv6) >> - ipv6_stub->udpv6_encap_enable(); >> - else >> -#endif >> - udp_encap_enable(); >> - >> return vs; >> } >> >> struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, >> - vxlan_rcv_t *rcv, void *data, >> + vxlan_rcv_t rcv, void *data, >> bool no_share, u32 flags) >> { >> - struct vxlan_net *vn = net_generic(net, vxlan_net_id); >> struct vxlan_sock *vs; >> >> vs = vxlan_socket_create(net, port, rcv, data, flags); >> @@ -2446,7 +2360,6 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, >> __be16 port, >> if (no_share) /* Return error if sharing is not allowed. */ >> return vs; >> >> - spin_lock(&vn->sock_lock); >> vs = vxlan_find_sock(net, port); >> if (vs) { >> if (vs->rcv == rcv) >> @@ -2454,7 +2367,6 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, >> __be16 port, >> else >> vs = ERR_PTR(-EBUSY); >> } >> - spin_unlock(&vn->sock_lock); >> >> if (!vs) >> vs = ERR_PTR(-EINVAL); >> @@ -2634,7 +2546,6 @@ static int vxlan_newlink(struct net *net, struct >> net_device *dev, >> } >> >> list_add(&vxlan->next, &vn->vxlan_list); >> - >> return 0; >> } >> >> diff --git a/include/net/vxlan.h b/include/net/vxlan.h >> index d5f59f3..10bfc13 100644 >> --- a/include/net/vxlan.h >> +++ b/include/net/vxlan.h >> @@ -4,23 +4,27 @@ >> #include <linux/skbuff.h> >> #include <linux/netdevice.h> >> #include <linux/udp.h> >> +#include <net/udp_tunnel.h> >> >> #define VNI_HASH_BITS 10 >> #define VNI_HASH_SIZE (1<<VNI_HASH_BITS) >> >> struct vxlan_sock; >> -typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, >> __be32 key); >> >> -/* per UDP socket information */ >> +typedef void (*vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb, >> + __be32 key); >> + >> +/* per vxlan socket information */ >> struct vxlan_sock { >> + struct udp_tunnel_sock uts; /* Must be the first member */ >> struct hlist_node hlist; >> - vxlan_rcv_t *rcv; >> - void *data; >> + struct list_head next; >> struct work_struct del_work; >> - struct socket *sock; >> struct rcu_head rcu; >> struct hlist_head vni_list[VNI_HASH_SIZE]; >> atomic_t refcnt; >> + vxlan_rcv_t rcv; >> + void *rcv_data; >> struct udp_offload udp_offloads; >> }; >> >> @@ -35,7 +39,7 @@ struct vxlan_sock { >> #define VXLAN_F_UDP_ZERO_CSUM6_RX 0x100 >> >> struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, >> - vxlan_rcv_t *rcv, void *data, >> + vxlan_rcv_t rcv, void *data, >> bool no_share, u32 flags); >> >> void vxlan_sock_release(struct vxlan_sock *vs); >> diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c >> index d8b7e24..7599efd 100644 >> --- a/net/openvswitch/vport-vxlan.c >> +++ b/net/openvswitch/vport-vxlan.c >> @@ -59,7 +59,7 @@ static inline struct vxlan_port *vxlan_vport(const >> struct vport *vport) >> static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 >> vx_vni) >> { >> struct ovs_key_ipv4_tunnel tun_key; >> - struct vport *vport = vs->data; >> + struct vport *vport = vs->rcv_data; >> struct iphdr *iph; >> __be64 key; >> >> @@ -74,7 +74,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct >> sk_buff *skb, __be32 vx_vni) >> static int vxlan_get_options(const struct vport *vport, struct sk_buff >> *skb) >> { >> struct vxlan_port *vxlan_port = vxlan_vport(vport); >> - __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport; >> + __be16 dst_port = >> inet_sk(vxlan_port->vs->uts.sock->sk)->inet_sport; >> >> if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port))) >> return -EMSGSIZE; >> @@ -139,7 +139,7 @@ static int vxlan_tnl_send(struct vport *vport, struct >> sk_buff *skb) >> { >> struct net *net = ovs_dp_get_net(vport->dp); >> struct vxlan_port *vxlan_port = vxlan_vport(vport); >> - __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport; >> + __be16 dst_port = >> inet_sk(vxlan_port->vs->uts.sock->sk)->inet_sport; >> struct rtable *rt; >> struct flowi4 fl; >> __be16 src_port; >> -- >> 1.7.9.5 >> >> -- >> To unsubscribe from this list: send the line "unsubscribe netdev" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html > ^ permalink raw reply [flat|nested] 10+ messages in thread
* [net-next v3 3/3] l2tp: Refactor l2tp core driver to make use of the common UDP tunnel functions 2014-08-27 3:35 [net-next v3 1/3] udp: Expand UDP tunnel common APIs Andy Zhou 2014-08-27 3:35 ` [net-next v3 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions Andy Zhou @ 2014-08-27 3:35 ` Andy Zhou 2014-08-27 3:54 ` [net-next v3 1/3] udp: Expand UDP tunnel common APIs Eric Dumazet 2014-08-27 4:12 ` Tom Herbert 3 siblings, 0 replies; 10+ messages in thread From: Andy Zhou @ 2014-08-27 3:35 UTC (permalink / raw) To: davem; +Cc: netdev, Andy Zhou Signed-off-by: Andy Zhou <azhou@nicira.com> --- net/l2tp/l2tp_core.c | 189 +++++++++++++++++++++++++++++--------------------- 1 file changed, 109 insertions(+), 80 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 1109d3b..b4b9474 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -113,6 +113,11 @@ struct l2tp_net { spinlock_t l2tp_session_hlist_lock; }; +struct l2tp_udp_tunnel { + struct udp_tunnel_sock uts; + struct l2tp_tunnel tunnel; +}; + static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel); static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk) @@ -1334,14 +1339,47 @@ static void l2tp_tunnel_del_work(struct work_struct *work) if (sock) inet_shutdown(sock, 2); } else { + struct l2tp_udp_tunnel *udp_tunnel; + + udp_tunnel = container_of(tunnel, + struct l2tp_udp_tunnel, tunnel); + if (sock) - kernel_sock_shutdown(sock, SHUT_RDWR); - sk_release_kernel(sk); + udp_tunnel_sock_release(&udp_tunnel->uts); } l2tp_tunnel_sock_put(sk); } +static void l2tp_tunnel_udp_port_conf(struct l2tp_tunnel_cfg *cfg, + struct udp_port_cfg *udp_conf) +{ + memset(udp_conf, 0, sizeof(*udp_conf)); + +#if IS_ENABLED(CONFIG_IPV6) + if (cfg->local_ip6 && cfg->peer_ip6) { + udp_conf->family = AF_INET6; + memcpy(&udp_conf->local_ip6, cfg->local_ip6, + sizeof(udp_conf->local_ip6)); + memcpy(&udp_conf->peer_ip6, cfg->peer_ip6, + sizeof(udp_conf->peer_ip6)); + udp_conf->use_udp6_tx_checksums = + cfg->udp6_zero_tx_checksums; + udp_conf->use_udp6_rx_checksums = + cfg->udp6_zero_rx_checksums; + } else +#endif + { + udp_conf->family = AF_INET; + udp_conf->local_ip = cfg->local_ip; + udp_conf->peer_ip = cfg->peer_ip; + udp_conf->use_udp_checksums = cfg->use_udp_checksums; + } + + udp_conf->local_udp_port = htons(cfg->local_udp_port); + udp_conf->peer_udp_port = htons(cfg->peer_udp_port); +} + /* Create a socket for the tunnel, if one isn't set up by * userspace. This is used for static tunnels where there is no * managing L2TP daemon. @@ -1363,31 +1401,7 @@ static int l2tp_tunnel_sock_create(struct net *net, switch (cfg->encap) { case L2TP_ENCAPTYPE_UDP: - memset(&udp_conf, 0, sizeof(udp_conf)); - -#if IS_ENABLED(CONFIG_IPV6) - if (cfg->local_ip6 && cfg->peer_ip6) { - udp_conf.family = AF_INET6; - memcpy(&udp_conf.local_ip6, cfg->local_ip6, - sizeof(udp_conf.local_ip6)); - memcpy(&udp_conf.peer_ip6, cfg->peer_ip6, - sizeof(udp_conf.peer_ip6)); - udp_conf.use_udp6_tx_checksums = - cfg->udp6_zero_tx_checksums; - udp_conf.use_udp6_rx_checksums = - cfg->udp6_zero_rx_checksums; - } else -#endif - { - udp_conf.family = AF_INET; - udp_conf.local_ip = cfg->local_ip; - udp_conf.peer_ip = cfg->peer_ip; - udp_conf.use_udp_checksums = cfg->use_udp_checksums; - } - - udp_conf.local_udp_port = htons(cfg->local_udp_port); - udp_conf.peer_udp_port = htons(cfg->peer_udp_port); - + l2tp_tunnel_udp_port_conf(cfg, &udp_conf); err = udp_sock_create(net, &udp_conf, &sock); if (err < 0) goto out; @@ -1471,6 +1485,31 @@ out: static struct lock_class_key l2tp_socket_class; +static int l2tp_sk_sanity_check(struct sock *sk, enum l2tp_encap_type encap, + u32 tunnel_id, int fd) +{ + unsigned int expected_protocol; + + switch (encap) { + case L2TP_ENCAPTYPE_UDP: + expected_protocol = IPPROTO_UDP; + break; + case L2TP_ENCAPTYPE_IP: + expected_protocol = IPPROTO_L2TP; + break; + default: + return -EPROTONOSUPPORT; + } + + if (sk->sk_protocol != expected_protocol) { + pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n", + tunnel_id, fd, sk->sk_protocol, expected_protocol); + return -EPROTONOSUPPORT; + } + + return 0; +} + int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp) { struct l2tp_tunnel *tunnel = NULL; @@ -1478,7 +1517,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 struct socket *sock = NULL; struct sock *sk = NULL; struct l2tp_net *pn; - enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP; + enum l2tp_encap_type encap = cfg ? cfg->encap : L2TP_ENCAPTYPE_UDP; /* Get the tunnel socket from the fd, which was opened by * the userspace L2TP daemon. If not specified, create a @@ -1486,9 +1525,11 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 */ if (fd < 0) { err = l2tp_tunnel_sock_create(net, tunnel_id, peer_tunnel_id, - cfg, &sock); + cfg, &sock); if (err < 0) goto err; + + sk = sock->sk; } else { sock = sockfd_lookup(fd, &err); if (!sock) { @@ -1498,58 +1539,66 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 goto err; } + sk = sock->sk; + /* Reject namespace mismatches */ - if (!net_eq(sock_net(sock->sk), net)) { + if (!net_eq(sock_net(sk), net)) { pr_err("tunl %u: netns mismatch\n", tunnel_id); err = -EINVAL; goto err; } + + /* Quick sanity checks */ + err = l2tp_sk_sanity_check(sk, encap, tunnel_id, fd); + if (err) + goto err; + + /* Check if this socket has already been prepped */ + tunnel = l2tp_tunnel(sk); + if (tunnel != NULL) { + /* This socket has already been prepped */ + err = -EBUSY; + goto err; + } } - sk = sock->sk; + switch(encap) { + case L2TP_ENCAPTYPE_UDP: { + struct udp_tunnel_socket_cfg l2tp_udp_cfg; + struct l2tp_udp_tunnel *udp_tunnel; - if (cfg != NULL) - encap = cfg->encap; + l2tp_tunnel_udp_port_conf(cfg, &l2tp_udp_cfg.port); + l2tp_udp_cfg.encap_type = UDP_ENCAP_L2TPINUDP; + l2tp_udp_cfg.encap_rcv = l2tp_udp_encap_recv; + l2tp_udp_cfg.encap_destroy = l2tp_udp_encap_destroy; - /* Quick sanity checks */ - switch (encap) { - case L2TP_ENCAPTYPE_UDP: - err = -EPROTONOSUPPORT; - if (sk->sk_protocol != IPPROTO_UDP) { - pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n", - tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP); + udp_tunnel = (struct l2tp_udp_tunnel *) + create_udp_tunnel_sock(net, sizeof(*udp_tunnel), + sock, &l2tp_udp_cfg); + + if (!udp_tunnel) { + err = -ENOMEM; goto err; } + + tunnel = &udp_tunnel->tunnel; break; + } case L2TP_ENCAPTYPE_IP: - err = -EPROTONOSUPPORT; - if (sk->sk_protocol != IPPROTO_L2TP) { - pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n", - tunnel_id, fd, sk->sk_protocol, IPPROTO_L2TP); + tunnel = kzalloc(sizeof(*tunnel), GFP_KERNEL); + if (tunnel == NULL) { + err = -ENOMEM; goto err; } - break; - } - - /* Check if this socket has already been prepped */ - tunnel = l2tp_tunnel(sk); - if (tunnel != NULL) { - /* This socket has already been prepped */ - err = -EBUSY; - goto err; } - tunnel = kzalloc(sizeof(struct l2tp_tunnel), GFP_KERNEL); - if (tunnel == NULL) { - err = -ENOMEM; - goto err; - } + rcu_assign_sk_user_data(sk, tunnel); + tunnel->encap = encap; tunnel->version = version; tunnel->tunnel_id = tunnel_id; tunnel->peer_tunnel_id = peer_tunnel_id; - tunnel->debug = L2TP_DEFAULT_DEBUG_FLAGS; - + tunnel->debug = cfg ? cfg->debug : L2TP_DEFAULT_DEBUG_FLAGS; tunnel->magic = L2TP_TUNNEL_MAGIC; sprintf(&tunnel->name[0], "tunl %u", tunnel_id); rwlock_init(&tunnel->hlist_lock); @@ -1558,9 +1607,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 tunnel->l2tp_net = net; pn = l2tp_pernet(net); - if (cfg != NULL) - tunnel->debug = cfg->debug; - #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -1579,23 +1625,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 } #endif - /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ - tunnel->encap = encap; - if (encap == L2TP_ENCAPTYPE_UDP) { - /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ - udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP; - udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv; - udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy; -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) - udpv6_encap_enable(); - else -#endif - udp_encap_enable(); - } - - sk->sk_user_data = tunnel; - /* Hook on the tunnel socket destructor so that we can cleanup * if the tunnel socket goes away. */ -- 1.7.9.5 ^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [net-next v3 1/3] udp: Expand UDP tunnel common APIs 2014-08-27 3:35 [net-next v3 1/3] udp: Expand UDP tunnel common APIs Andy Zhou 2014-08-27 3:35 ` [net-next v3 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions Andy Zhou 2014-08-27 3:35 ` [net-next v3 3/3] l2tp: Refactor l2tp core " Andy Zhou @ 2014-08-27 3:54 ` Eric Dumazet 2014-08-27 5:01 ` Andy Zhou 2014-08-27 4:12 ` Tom Herbert 3 siblings, 1 reply; 10+ messages in thread From: Eric Dumazet @ 2014-08-27 3:54 UTC (permalink / raw) To: Andy Zhou; +Cc: davem, netdev On Tue, 2014-08-26 at 20:35 -0700, Andy Zhou wrote: > Added create_udp_tunnel_socket(), packet receive and transmit, and > other related common functions for UDP tunnels. > > Per net open UDP tunnel ports are tracked in this common layer to > prevent sharing of a single port with more than one UDP tunnel. > > Signed-off-by: Andy Zhou <azhou@nicira.com> > --- I see some RCU code, but I do not see elementary rules of RCU being respected in this patch. Changelogs are a bit terse, and there is not a single comment in this code. What is going on exactly ? ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [net-next v3 1/3] udp: Expand UDP tunnel common APIs 2014-08-27 3:54 ` [net-next v3 1/3] udp: Expand UDP tunnel common APIs Eric Dumazet @ 2014-08-27 5:01 ` Andy Zhou 2014-08-27 11:45 ` Eric Dumazet 0 siblings, 1 reply; 10+ messages in thread From: Andy Zhou @ 2014-08-27 5:01 UTC (permalink / raw) To: Eric Dumazet; +Cc: David Miller, netdev@vger.kernel.org On Tue, Aug 26, 2014 at 8:54 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote: > On Tue, 2014-08-26 at 20:35 -0700, Andy Zhou wrote: >> Added create_udp_tunnel_socket(), packet receive and transmit, and >> other related common functions for UDP tunnels. >> >> Per net open UDP tunnel ports are tracked in this common layer to >> prevent sharing of a single port with more than one UDP tunnel. >> >> Signed-off-by: Andy Zhou <azhou@nicira.com> >> --- > > I see some RCU code, but I do not see elementary rules of RCU being > respected in this patch. Which RCU rule are you referring to? > > Changelogs are a bit terse, and there is not a single comment in this > code. I can add more content and comments in the next patch. > What is going on exactly ? > I am trying to refactor some UDP tunnel code so that we don't end up duplicate a lot of code when adding new UDP based tunnel protocols. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [net-next v3 1/3] udp: Expand UDP tunnel common APIs 2014-08-27 5:01 ` Andy Zhou @ 2014-08-27 11:45 ` Eric Dumazet 0 siblings, 0 replies; 10+ messages in thread From: Eric Dumazet @ 2014-08-27 11:45 UTC (permalink / raw) To: Andy Zhou; +Cc: David Miller, netdev@vger.kernel.org On Tue, 2014-08-26 at 22:01 -0700, Andy Zhou wrote: > Which RCU rule are you referring to? The most elementary ones, like observing rcu grace period before freeing ? udp_tunnel_sock_release() is obviously wrong, or needs an appropriate documentation. Please carefully read Documentation/RCU/checklist.txt and tell us why you believe your code is correct, either in the changelog or using comments. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [net-next v3 1/3] udp: Expand UDP tunnel common APIs 2014-08-27 3:35 [net-next v3 1/3] udp: Expand UDP tunnel common APIs Andy Zhou ` (2 preceding siblings ...) 2014-08-27 3:54 ` [net-next v3 1/3] udp: Expand UDP tunnel common APIs Eric Dumazet @ 2014-08-27 4:12 ` Tom Herbert 2014-08-27 5:17 ` Andy Zhou 3 siblings, 1 reply; 10+ messages in thread From: Tom Herbert @ 2014-08-27 4:12 UTC (permalink / raw) To: Andy Zhou; +Cc: David Miller, Linux Netdev List On Tue, Aug 26, 2014 at 8:35 PM, Andy Zhou <azhou@nicira.com> wrote: > Added create_udp_tunnel_socket(), packet receive and transmit, and > other related common functions for UDP tunnels. > > Per net open UDP tunnel ports are tracked in this common layer to > prevent sharing of a single port with more than one UDP tunnel. > This is not needed! If a UDP port is already bound (whether by another tunnel or not), then bind during tunnel initialization will fail. All this logic to store tunnel sockets in a separate list seems like unnecessary complexity. If a driver needs to track multiple ports it opens, it can do that on its own like VXLAN is already doing. > Signed-off-by: Andy Zhou <azhou@nicira.com> > --- > include/net/udp_tunnel.h | 50 +++++++++++ > net/ipv4/udp_tunnel.c | 216 +++++++++++++++++++++++++++++++++++++++++++++- > 2 files changed, 265 insertions(+), 1 deletion(-) > > diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h > index ffd69cb..5ff0746 100644 > --- a/include/net/udp_tunnel.h > +++ b/include/net/udp_tunnel.h > @@ -1,6 +1,8 @@ > #ifndef __NET_UDP_TUNNEL_H > #define __NET_UDP_TUNNEL_H > > +#include <net/ip_tunnels.h> > + > struct udp_port_cfg { > u8 family; > > @@ -26,7 +28,55 @@ struct udp_port_cfg { > use_udp6_rx_checksums:1; > }; > > +struct udp_tunnel_sock; > + > +typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb); > +typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk); > + > +struct udp_tunnel_socket_cfg { > + struct udp_port_cfg port; > + > + /* Used for setting up udp_sock fields, see udp.h for details */ > + __u8 encap_type; > + udp_tunnel_encap_rcv_t encap_rcv; > + udp_tunnel_encap_destroy_t encap_destroy; > +}; > + > +struct udp_tunnel_sock { > + struct hlist_node hlist; > + struct socket *sock; > +}; > + > int udp_sock_create(struct net *net, struct udp_port_cfg *cfg, > struct socket **sockp); > > +struct udp_tunnel_sock *create_udp_tunnel_sock(struct net *net, size_t size, > + struct socket *sock, > + struct udp_tunnel_socket_cfg > + *socket_cfg); > + > +struct udp_tunnel_sock *udp_tunnel_find_sock(struct net *net, __be16 port); > + > +int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt, > + struct sk_buff *skb, __be32 src, __be32 dst, > + __u8 tos, __u8 ttl, __be16 df, __be16 src_port, > + __be16 dst_port, bool xnet); > + > +#if IS_ENABLED(CONFIG_IPV6) > +int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst, > + struct sk_buff *skb, struct net_device *dev, > + struct in6_addr *saddr, struct in6_addr *daddr, > + __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port); > + > +#endif > + > +void udp_tunnel_sock_release(struct udp_tunnel_sock *uts); > + > +static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb, > + bool udp_csum) > +{ > + int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; > + > + return iptunnel_handle_offloads(skb, udp_csum, type); > +} > #endif > diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c > index 61ec1a6..3ed66bc 100644 > --- a/net/ipv4/udp_tunnel.c > +++ b/net/ipv4/udp_tunnel.c > @@ -7,6 +7,23 @@ > #include <net/udp.h> > #include <net/udp_tunnel.h> > #include <net/net_namespace.h> > +#include <net/netns/generic.h> > +#if IS_ENABLED(CONFIG_IPV6) > +#include <net/ipv6.h> > +#include <net/addrconf.h> > +#include <net/ip6_tunnel.h> > +#include <net/ip6_checksum.h> > +#endif > + > +#define PORT_HASH_BITS 8 > +#define PORT_HASH_SIZE (1 << PORT_HASH_BITS) > + > +static int udp_tunnel_net_id; > + > +struct udp_tunnel_net { > + struct hlist_head sock_list[PORT_HASH_SIZE]; > + spinlock_t sock_lock; /* Protecting the sock_list */ > +}; > > int udp_sock_create(struct net *net, struct udp_port_cfg *cfg, > struct socket **sockp) > @@ -82,7 +99,6 @@ int udp_sock_create(struct net *net, struct udp_port_cfg *cfg, > return -EPFNOSUPPORT; > } > > - > *sockp = sock; > > return 0; > @@ -97,4 +113,202 @@ error: > } > EXPORT_SYMBOL(udp_sock_create); > > + > +/* Socket hash table head */ > +static inline struct hlist_head *uts_head(struct net *net, const __be16 port) > +{ > + struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id); > + > + return &utn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)]; > +} > + > +struct udp_tunnel_sock *create_udp_tunnel_sock(struct net *net, size_t size, > + struct socket *sock, > + struct udp_tunnel_socket_cfg > + *cfg) > +{ > + struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id); > + struct udp_tunnel_sock *uts; > + struct sock *sk; > + const __be16 port = cfg->port.local_udp_port; > + const int ipv6 = (cfg->port.family == AF_INET6); > + int err; > + > + if (!sock) > + err = udp_sock_create(net, &cfg->port, &sock); > + else > + err = (sock->sk->sk_protocol == IPPROTO_UDP) ? > + 0 : -EPROTONOSUPPORT; > + > + if (err) > + return NULL; > + > + uts = kzalloc(size, GFP_KERNEL); > + if (!uts) > + return ERR_PTR(-ENOMEM); > + > + sk = sock->sk; > + > + /* Disable multicast loopback */ > + inet_sk(sk)->mc_loop = 0; > + > + rcu_assign_sk_user_data(sk, uts); > + > + udp_sk(sk)->encap_type = cfg->encap_type; > + udp_sk(sk)->encap_rcv = cfg->encap_rcv; > + udp_sk(sk)->encap_destroy = cfg->encap_destroy; > + > + uts->sock = sock; > + > + spin_lock(&utn->sock_lock); > + hlist_add_head_rcu(&uts->hlist, uts_head(net, port)); > + spin_unlock(&utn->sock_lock); > + > +#if IS_ENABLED(CONFIG_IPV6) > + if (ipv6) > + ipv6_stub->udpv6_encap_enable(); > + else > +#endif > + udp_encap_enable(); > + > + return uts; > +} > +EXPORT_SYMBOL_GPL(create_udp_tunnel_sock); > + > +int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt, > + struct sk_buff *skb, __be32 src, __be32 dst, > + __u8 tos, __u8 ttl, __be16 df, __be16 src_port, > + __be16 dst_port, bool xnet) > +{ > + struct udphdr *uh; > + > + __skb_push(skb, sizeof(*uh)); > + skb_reset_transport_header(skb); > + uh = udp_hdr(skb); > + > + uh->dest = dst_port; > + uh->source = src_port; > + uh->len = htons(skb->len); > + > + udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len); > + > + return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP, > + tos, ttl, df, xnet); > +} > +EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb); > + > +#if IS_ENABLED(CONFIG_IPV6) > +int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst, > + struct sk_buff *skb, struct net_device *dev, > + struct in6_addr *saddr, struct in6_addr *daddr, > + __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port) > +{ > + struct udphdr *uh; > + struct ipv6hdr *ip6h; > + > + __skb_push(skb, sizeof(*uh)); > + skb_reset_transport_header(skb); > + uh = udp_hdr(skb); > + > + uh->dest = dst_port; > + uh->source = src_port; > + > + uh->len = htons(skb->len); > + uh->check = 0; > + > + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); > + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED > + | IPSKB_REROUTED); > + skb_dst_set(skb, dst); > + > + if (!skb_is_gso(skb) && !(dst->dev->features & NETIF_F_IPV6_CSUM)) { > + __wsum csum = skb_checksum(skb, 0, skb->len, 0); > + > + skb->ip_summed = CHECKSUM_UNNECESSARY; > + uh->check = csum_ipv6_magic(saddr, daddr, skb->len, > + IPPROTO_UDP, csum); > + if (uh->check == 0) > + uh->check = CSUM_MANGLED_0; > + } else { > + skb->ip_summed = CHECKSUM_PARTIAL; > + skb->csum_start = skb_transport_header(skb) - skb->head; > + skb->csum_offset = offsetof(struct udphdr, check); > + uh->check = ~csum_ipv6_magic(saddr, daddr, > + skb->len, IPPROTO_UDP, 0); > + } > + > + __skb_push(skb, sizeof(*ip6h)); > + skb_reset_network_header(skb); > + ip6h = ipv6_hdr(skb); > + ip6h->version = 6; > + ip6h->priority = prio; > + ip6h->flow_lbl[0] = 0; > + ip6h->flow_lbl[1] = 0; > + ip6h->flow_lbl[2] = 0; > + ip6h->payload_len = htons(skb->len); > + ip6h->nexthdr = IPPROTO_UDP; > + ip6h->hop_limit = ttl; > + ip6h->daddr = *daddr; > + ip6h->saddr = *saddr; > + > + ip6tunnel_xmit(skb, dev); > + return 0; > +} > +EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb); > +#endif > + > +struct udp_tunnel_sock *udp_tunnel_find_sock(struct net *net, __be16 port) > +{ > + struct udp_tunnel_sock *uts; > + > + hlist_for_each_entry_rcu(uts, uts_head(net, port), hlist) { > + if (inet_sk(uts->sock->sk)->inet_sport == port) > + return uts; > + } > + > + return NULL; > +} > +EXPORT_SYMBOL_GPL(udp_tunnel_find_sock); > + > +void udp_tunnel_sock_release(struct udp_tunnel_sock *uts) > +{ > + struct sock *sk = uts->sock->sk; > + struct net *net = sock_net(sk); > + struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id); > + > + spin_lock(&utn->sock_lock); > + hlist_del_rcu(&uts->hlist); > + rcu_assign_sk_user_data(uts->sock->sk, NULL); > + kernel_sock_shutdown(uts->sock, SHUT_RDWR); > + sk_release_kernel(sk); > + spin_unlock(&utn->sock_lock); > +} > +EXPORT_SYMBOL_GPL(udp_tunnel_sock_release); > + > +static int __net_init udp_tunnel_init_net(struct net *net) > +{ > + struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id); > + unsigned int h; > + > + spin_lock_init(&utn->sock_lock); > + > + for (h = 0; h < PORT_HASH_SIZE; h++) > + INIT_HLIST_HEAD(&utn->sock_list[h]); > + > + return 0; > +} > + > +static struct pernet_operations udp_tunnel_net_ops = { > + .init = udp_tunnel_init_net, > + .exit = NULL, > + .id = &udp_tunnel_net_id, > + .size = sizeof(struct udp_tunnel_net), > +}; > + > +static int __init udp_tunnel_init(void) > +{ > + return register_pernet_subsys(&udp_tunnel_net_ops); > +} > +late_initcall(udp_tunnel_init); > + > MODULE_LICENSE("GPL"); > -- > 1.7.9.5 > > -- > To unsubscribe from this list: send the line "unsubscribe netdev" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [net-next v3 1/3] udp: Expand UDP tunnel common APIs 2014-08-27 4:12 ` Tom Herbert @ 2014-08-27 5:17 ` Andy Zhou 0 siblings, 0 replies; 10+ messages in thread From: Andy Zhou @ 2014-08-27 5:17 UTC (permalink / raw) To: Tom Herbert; +Cc: David Miller, Linux Netdev List On Tue, Aug 26, 2014 at 9:12 PM, Tom Herbert <therbert@google.com> wrote: > On Tue, Aug 26, 2014 at 8:35 PM, Andy Zhou <azhou@nicira.com> wrote: >> Added create_udp_tunnel_socket(), packet receive and transmit, and >> other related common functions for UDP tunnels. >> >> Per net open UDP tunnel ports are tracked in this common layer to >> prevent sharing of a single port with more than one UDP tunnel. >> > This is not needed! If a UDP port is already bound (whether by another > tunnel or not), then bind during tunnel initialization will fail. All > this logic to store tunnel sockets in a separate list seems like > unnecessary complexity. If a driver needs to track multiple ports it > opens, it can do that on its own like VXLAN is already doing. I was hoping to retain the udp_tunnel_find_sock() function which seems to be common. But it is not doing much at the moment, so I can just drop it in the next version. ^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2014-08-27 11:45 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-08-27 3:35 [net-next v3 1/3] udp: Expand UDP tunnel common APIs Andy Zhou
2014-08-27 3:35 ` [net-next v3 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions Andy Zhou
2014-08-27 4:15 ` Tom Herbert
[not found] ` <CA+mtBx8R2YyAMbPuw=iYDbLpSwRo_robnxOYPUFV2368-RzdLg@mail.gmail.com>
2014-08-27 5:19 ` Andy Zhou
2014-08-27 3:35 ` [net-next v3 3/3] l2tp: Refactor l2tp core " Andy Zhou
2014-08-27 3:54 ` [net-next v3 1/3] udp: Expand UDP tunnel common APIs Eric Dumazet
2014-08-27 5:01 ` Andy Zhou
2014-08-27 11:45 ` Eric Dumazet
2014-08-27 4:12 ` Tom Herbert
2014-08-27 5:17 ` Andy Zhou
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).