From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andy Zhou Subject: [net-next 02/10] udp: Expand UDP tunnel common APIs Date: Tue, 22 Jul 2014 03:19:45 -0700 Message-ID: <1406024393-6778-3-git-send-email-azhou@nicira.com> References: <1406024393-6778-1-git-send-email-azhou@nicira.com> Cc: netdev@vger.kernel.org, Andy Zhou To: davem@davemloft.net Return-path: Received: from na3sys009aog130.obsmtp.com ([74.125.149.143]:60267 "HELO na3sys009aog130.obsmtp.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with SMTP id S1754685AbaGVKdA (ORCPT ); Tue, 22 Jul 2014 06:33:00 -0400 Received: by mail-pd0-f182.google.com with SMTP id fp1so10864999pdb.41 for ; Tue, 22 Jul 2014 03:32:53 -0700 (PDT) In-Reply-To: <1406024393-6778-1-git-send-email-azhou@nicira.com> Sender: netdev-owner@vger.kernel.org List-ID: Added create_udp_tunnel_socket(), packet receive and transmit, and other related common functions for UDP tunnels. Per net open UDP tunnel ports are tracked in this common layer to prevent sharing of a single port with more than one UDP tunnel. Signed-off-by: Andy Zhou --- include/net/udp_tunnel.h | 57 +++++++++- net/ipv4/udp_tunnel.c | 257 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 312 insertions(+), 2 deletions(-) diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 3f34c65..b5e815a 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -1,7 +1,10 @@ #ifndef __NET_UDP_TUNNEL_H #define __NET_UDP_TUNNEL_H -#define UDP_TUNNEL_TYPE_VXLAN 0x01 +#include + +#define UDP_TUNNEL_TYPE_VXLAN 0x01 +#define UDP_TUNNEL_TYPE_GENEVE 0x02 struct udp_port_cfg { u8 family; @@ -28,7 +31,59 @@ struct udp_port_cfg { use_udp6_rx_checksums:1; }; +struct udp_tunnel_sock; + +typedef void (udp_tunnel_rcv_t)(struct udp_tunnel_sock *uts, + struct sk_buff *skb, ...); + +typedef int (udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb); + +struct udp_tunnel_socket_cfg { + u8 tunnel_type; + struct udp_port_cfg port; + udp_tunnel_rcv_t *rcv; + udp_tunnel_encap_rcv_t *encap_rcv; + void *data; +}; + +struct udp_tunnel_sock { + u8 tunnel_type; + struct hlist_node hlist; + udp_tunnel_rcv_t *rcv; + void *data; + struct socket *sock; +}; + int udp_sock_create(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp); +struct udp_tunnel_sock *create_udp_tunnel_socket(struct net *net, size_t size, + struct udp_tunnel_socket_cfg + *socket_cfg); + +struct udp_tunnel_sock *udp_tunnel_find_sock(struct net *net, __be16 port); + +int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt, + struct sk_buff *skb, __be32 src, __be32 dst, + __u8 tos, __u8 ttl, __be16 df, __be16 src_port, + __be16 dst_port, bool xnet); + +#if IS_ENABLED(CONFIG_IPV6) +int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst, + struct sk_buff *skb, struct net_device *dev, + struct in6_addr *saddr, struct in6_addr *daddr, + __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port); + +#endif + +void udp_tunnel_sock_release(struct udp_tunnel_sock *uts); +void udp_tunnel_get_rx_port(struct net_device *dev); + +static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb, + bool udp_csum) +{ + int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; + + return iptunnel_handle_offloads(skb, udp_csum, type); +} #endif diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index 61ec1a6..3c14b16 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -7,6 +7,23 @@ #include #include #include +#include +#if IS_ENABLED(CONFIG_IPV6) +#include +#include +#include +#include +#endif + +#define PORT_HASH_BITS 8 +#define PORT_HASH_SIZE (1 << PORT_HASH_BITS) + +static int udp_tunnel_net_id; + +struct udp_tunnel_net { + struct hlist_head sock_list[PORT_HASH_SIZE]; + spinlock_t sock_lock; /* Protecting the sock_list */ +}; int udp_sock_create(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp) @@ -82,7 +99,6 @@ int udp_sock_create(struct net *net, struct udp_port_cfg *cfg, return -EPFNOSUPPORT; } - *sockp = sock; return 0; @@ -97,4 +113,243 @@ error: } EXPORT_SYMBOL(udp_sock_create); + +/* Socket hash table head */ +static inline struct hlist_head *uts_head(struct net *net, const __be16 port) +{ + struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id); + + return &utn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)]; +} + +static int handle_offloads(struct sk_buff *skb) +{ + if (skb_is_gso(skb)) { + int err = skb_unclone(skb, GFP_ATOMIC); + + if (unlikely(err)) + return err; + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; + } else { + if (skb->ip_summed != CHECKSUM_PARTIAL) + skb->ip_summed = CHECKSUM_NONE; + } + + return 0; +} + +struct udp_tunnel_sock *create_udp_tunnel_socket(struct net *net, size_t size, + struct udp_tunnel_socket_cfg + *cfg) +{ + struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id); + struct udp_tunnel_sock *uts; + struct socket *sock; + struct sock *sk; + const __be16 port = cfg->port.local_udp_port; + const int ipv6 = (cfg->port.family == AF_INET6); + int err; + + uts = kzalloc(size, GFP_KERNEL); + if (!uts) + return ERR_PTR(-ENOMEM); + + err = udp_sock_create(net, &cfg->port, &sock); + if (err < 0) { + kfree(uts); + return NULL; + } + + /* Disable multicast loopback */ + inet_sk(sock->sk)->mc_loop = 0; + + uts->sock = sock; + sk = sock->sk; + uts->rcv = cfg->rcv; + uts->data = cfg->data; + rcu_assign_sk_user_data(sock->sk, uts); + + spin_lock(&utn->sock_lock); + hlist_add_head_rcu(&uts->hlist, uts_head(net, port)); + spin_unlock(&utn->sock_lock); + + udp_sk(sk)->encap_type = 1; + udp_sk(sk)->encap_rcv = cfg->encap_rcv; + +#if IS_ENABLED(CONFIG_IPV6) + if (ipv6) + ipv6_stub->udpv6_encap_enable(); + else +#endif + udp_encap_enable(); + + return uts; +} +EXPORT_SYMBOL_GPL(create_udp_tunnel_socket); + +int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt, + struct sk_buff *skb, __be32 src, __be32 dst, + __u8 tos, __u8 ttl, __be16 df, __be16 src_port, + __be16 dst_port, bool xnet) +{ + struct udphdr *uh; + + __skb_push(skb, sizeof(*uh)); + skb_reset_transport_header(skb); + uh = udp_hdr(skb); + + uh->dest = dst_port; + uh->source = src_port; + uh->len = htons(skb->len); + + udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len); + + return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP, + tos, ttl, df, xnet); +} +EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb); + +#if IS_ENABLED(CONFIG_IPV6) +int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst, + struct sk_buff *skb, struct net_device *dev, + struct in6_addr *saddr, struct in6_addr *daddr, + __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port) +{ + struct udphdr *uh; + struct ipv6hdr *ip6h; + int err; + + __skb_push(skb, sizeof(*uh)); + skb_reset_transport_header(skb); + uh = udp_hdr(skb); + + uh->dest = dst_port; + uh->source = src_port; + + uh->len = htons(skb->len); + uh->check = 0; + + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED + | IPSKB_REROUTED); + skb_dst_set(skb, dst); + + if (!skb_is_gso(skb) && !(dst->dev->features & NETIF_F_IPV6_CSUM)) { + __wsum csum = skb_checksum(skb, 0, skb->len, 0); + + skb->ip_summed = CHECKSUM_UNNECESSARY; + uh->check = csum_ipv6_magic(saddr, daddr, skb->len, + IPPROTO_UDP, csum); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + } else { + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + uh->check = ~csum_ipv6_magic(saddr, daddr, + skb->len, IPPROTO_UDP, 0); + } + + __skb_push(skb, sizeof(*ip6h)); + skb_reset_network_header(skb); + ip6h = ipv6_hdr(skb); + ip6h->version = 6; + ip6h->priority = prio; + ip6h->flow_lbl[0] = 0; + ip6h->flow_lbl[1] = 0; + ip6h->flow_lbl[2] = 0; + ip6h->payload_len = htons(skb->len); + ip6h->nexthdr = IPPROTO_UDP; + ip6h->hop_limit = ttl; + ip6h->daddr = *daddr; + ip6h->saddr = *saddr; + + err = handle_offloads(skb); + if (err) + return err; + + ip6tunnel_xmit(skb, dev); + return 0; +} +EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb); +#endif + +struct udp_tunnel_sock *udp_tunnel_find_sock(struct net *net, __be16 port) +{ + struct udp_tunnel_sock *uts; + + hlist_for_each_entry_rcu(uts, uts_head(net, port), hlist) { + if (inet_sk(uts->sock->sk)->inet_sport == port) + return uts; + } + + return NULL; +} +EXPORT_SYMBOL_GPL(udp_tunnel_find_sock); + +void udp_tunnel_sock_release(struct udp_tunnel_sock *uts) +{ + struct sock *sk = uts->sock->sk; + struct net *net = sock_net(sk); + struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id); + + spin_lock(&utn->sock_lock); + hlist_del_rcu(&uts->hlist); + rcu_assign_sk_user_data(uts->sock->sk, NULL); + spin_unlock(&utn->sock_lock); +} +EXPORT_SYMBOL_GPL(udp_tunnel_sock_release); + +/* Calls the ndo_add_tunnel_port of the caller in order to + * supply the listening VXLAN udp ports. Callers are expected + * to implement the ndo_add_tunnle_port. + */ +void udp_tunnel_get_rx_port(struct net_device *dev) +{ + struct udp_tunnel_sock *uts; + struct net *net = dev_net(dev); + struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id); + sa_family_t sa_family; + __be16 port; + unsigned int i; + + spin_lock(&utn->sock_lock); + for (i = 0; i < PORT_HASH_SIZE; ++i) { + hlist_for_each_entry_rcu(uts, &utn->sock_list[i], hlist) { + port = inet_sk(uts->sock->sk)->inet_sport; + sa_family = uts->sock->sk->sk_family; + dev->netdev_ops->ndo_add_udp_tunnel_port(dev, + sa_family, port, uts->tunnel_type); + } + } + spin_unlock(&utn->sock_lock); +} +EXPORT_SYMBOL_GPL(udp_tunnel_get_rx_port); + +static int __net_init udp_tunnel_init_net(struct net *net) +{ + struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id); + unsigned int h; + + spin_lock_init(&utn->sock_lock); + + for (h = 0; h < PORT_HASH_SIZE; h++) + INIT_HLIST_HEAD(&utn->sock_list[h]); + + return 0; +} + +static struct pernet_operations udp_tunnel_net_ops = { + .init = udp_tunnel_init_net, + .exit = NULL, + .id = &udp_tunnel_net_id, + .size = sizeof(struct udp_tunnel_net), +}; + +static int __init udp_tunnel_init(void) +{ + return register_pernet_subsys(&udp_tunnel_net_ops); +} +late_initcall(udp_tunnel_init); + MODULE_LICENSE("GPL"); -- 1.7.9.5