* [PATCH V3 net-next] LISP: Locator/Identifier Separation Protocol
@ 2014-06-18 23:07 Christopher White
2014-06-19 0:08 ` Tom Herbert
0 siblings, 1 reply; 7+ messages in thread
From: Christopher White @ 2014-06-18 23:07 UTC (permalink / raw)
To: Linux Netdev List
Cc: Vina Ermagan (vermagan),
Lorand Jakab -X (lojakab - M SQUARED CONSULTING INC. at Cisco)
This is a static tunnel implementation of LISP as described in RFC 6830:
http://tools.ietf.org/html/rfc6830
This driver provides point-to-point LISP dataplane
encapsulation/decapsulation for statically configured endpoints. It provides
support for IPv4 in IPv4 and IPv6 in IPv4. IPv6 outer headers are not
supported yet. Instance ID is supported on a per device basis.
This implementation has been tested against LISPMob.
Changes from v2: Move some functions to common headers. Remove unecessary skb
ownership change. Minor cleanup. Address comments from Eric Dumazet (eric.dumazet@gmail.com)
and Tom Herbert (therbert@google.com).
Signed-off-by: Chris White <chris@logicalelegance.com>
---
drivers/net/Kconfig | 12 +
drivers/net/Makefile | 1 +
drivers/net/lisp.c | 857 ++++++++++++++++++++++++++++++++++++++++++
drivers/net/vxlan.c | 22 +-
include/net/route.h | 20 +
include/net/udp.h | 24 +-
include/uapi/linux/if_link.h | 17 +
net/ipv4/udp.c | 75 ----
net/ipv4/udp_offload.c | 118 ++++++
9 files changed, 1050 insertions(+), 96 deletions(-)
create mode 100644 drivers/net/lisp.c
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 89402c3..5d49b1e 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -158,6 +158,18 @@ config VXLAN
To compile this driver as a module, choose M here: the module
will be called vxlan.
+config LISP
+ tristate "Locator Identifier Separation Protocol (LISP)"
+ depends on INET
+ select NET_IP_TUNNEL
+ ---help---
+ Create a LISP virtual interface that provides static LISP tunnel
+ encapsulation. For more information see:
+ http://tools.ietf.org/html/rfc6830
+
+ To compile this driver as a module, choose M here: the module will be
+ called lisp.
+
config NETCONSOLE
tristate "Network console logging support"
---help---
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 3fef8a8..943590d 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_VETH) += veth.o
obj-$(CONFIG_VIRTIO_NET) += virtio_net.o
obj-$(CONFIG_VXLAN) += vxlan.o
obj-$(CONFIG_NLMON) += nlmon.o
+obj-$(CONFIG_LISP) += lisp.o
#
# Networking Drivers
diff --git a/drivers/net/lisp.c b/drivers/net/lisp.c
new file mode 100644
index 0000000..310c960
--- /dev/null
+++ b/drivers/net/lisp.c
@@ -0,0 +1,857 @@
+/*
+ * lisp.c
+ * This file is part of LISP Implementation.
+ * It provides a netdevice for static tunneling between LISP
+ * devices. IPv4 encapsulation is currently supported.
+ *
+ * Copyright (C) 2014 Cisco Systems, Inc, 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * Written or modified by:
+ * Chris White <chris@logicalelegance.com>
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+#include <linux/rculist.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/igmp.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/hash.h>
+#include <linux/ethtool.h>
+#include <net/arp.h>
+#include <net/ndisc.h>
+#include <net/ip.h>
+#include <net/ip_tunnels.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/rtnetlink.h>
+#include <net/route.h>
+#include <net/dsfield.h>
+#include <net/inet_ecn.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#include <net/ip6_tunnel.h>
+#include <net/ip6_checksum.h>
+#endif
+#include <net/route.h>
+#include <net/xfrm.h>
+#include <linux/in_route.h>
+#include <linux/version.h>
+
+#define LISP_VERSION "0.1"
+
+static inline void vlan_set_tci(struct sk_buff *skb, u16 vlan_tci)
+{
+ skb->vlan_tci = vlan_tci;
+}
+
+#define PORT_HASH_BITS 8
+#define PORT_HASH_SIZE (1 << PORT_HASH_BITS)
+
+/**
+ * struct lisphdr - LISP header
+ * @nonce_present: Flag indicating the presence of a 24 bit nonce value.
+ * @locator_status_bits_present: Flag indicating the presence of Locator Status
+ * Bits (LSB).
+ * @solicit_echo_nonce: Flag indicating the use of the echo noncing mechanism.
+ * @map_version_present: Flag indicating the use of mapping versioning.
+ * @instance_id_present: Flag indicating the presence of a 24 bit Instance ID.
+ * @reserved_flags: 3 bits reserved for future flags.
+ * @nonce: 24 bit nonce value.
+ * @map_version: 24 bit mapping version.
+ * @locator_status_bits: Locator Status Bits: 32 bits when instance_id_present
+ * is not set, 8 bits when it is.
+ * @instance_id: 24 bit Instance ID
+ */
+struct lisphdr {
+#ifdef __LITTLE_ENDIAN_BITFIELD
+ __u8 reserved_flags : 3;
+ __u8 instance_id_present : 1;
+ __u8 map_version_present : 1;
+ __u8 solicit_echo_nonce : 1;
+ __u8 locator_status_bits_present : 1;
+ __u8 nonce_present : 1;
+#else
+ __u8 nonce_present : 1;
+ __u8 locator_status_bits_present : 1;
+ __u8 solicit_echo_nonce : 1;
+ __u8 map_version_present : 1;
+ __u8 instance_id_present : 1;
+ __u8 reserved_flags : 3;
+#endif
+ union {
+ __u8 nonce[3];
+ __u8 map_version[3];
+ } u1;
+ union {
+ __be32 locator_status_bits;
+ struct {
+ __u8 instance_id[3];
+ __u8 locator_status_bits;
+ } word2;
+ } u2;
+};
+
+#define LISP_HLEN (sizeof(struct udphdr) + sizeof(struct lisphdr))
+
+/* UDP port for LISP traffic.
+ * The IANA assigned port is 4341.
+ */
+static unsigned short lisp_port __read_mostly = 4341;
+module_param_named(udp_port, lisp_port, ushort, 0444);
+MODULE_PARM_DESC(udp_port, "Destination UDP port");
+static int lisp_net_id;
+
+/* per-network namespace private data for this module */
+struct lisp_net {
+ struct list_head lisp_list;
+ struct hlist_head sock_list[PORT_HASH_SIZE];
+ spinlock_t sock_lock;
+};
+
+union lisp_addr {
+struct sockaddr_in sin;
+ struct sockaddr_in6 sin6;
+ struct sockaddr sa;
+};
+
+#define IID_HASH_BITS 10
+#define IID_HASH_SIZE (1 << IID_HASH_BITS)
+
+struct lisp_sock;
+typedef void (lisp_rcv_t)(struct lisp_sock *ls, struct sk_buff *skb);
+
+/* per UDP socket information */
+struct lisp_sock {
+ struct hlist_node hlist;
+ lisp_rcv_t *rcv;
+ void *data;
+ struct work_struct del_work;
+ struct socket *sock;
+ struct rcu_head rcu;
+ struct hlist_head iid_list[IID_HASH_SIZE];
+ atomic_t refcnt;
+};
+
+/* LISP psuedo network device */
+struct lisp_dev {
+ struct hlist_node hlist;
+ struct list_head next;
+ struct net_device *dev;
+ u32 iid; /* Instance ID */
+ struct lisp_sock *ls_socket; /* Input socket */
+ __be16 rcv_port; /* Listen port to receive packets */
+ __be16 encap_port; /* Dest port for encaped packets */
+ __u8 tos;
+ __u8 ttl;
+ u32 flags;
+ union lisp_addr remote; /* Tunnel dst (RLOC) */
+ union lisp_addr local; /* Tunnel src (our RLOC) */
+ struct work_struct sock_work;
+};
+
+#define LISP_F_UDP_CSUM 0x1
+
+static struct workqueue_struct *lisp_wq;
+
+/* Instance ID hash table head */
+static inline struct hlist_head *iid_head(struct lisp_sock *s, u32 iid)
+{
+ return &s->iid_list[hash_32(iid, IID_HASH_BITS)];
+}
+
+/* Socket hash table head */
+static inline struct hlist_head *s_head(struct net *net, __be16 port)
+{
+ struct lisp_net *ln = net_generic(net, lisp_net_id);
+
+ return &ln->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
+}
+
+/* Find LISP socket based on network namespace and UDP port */
+static struct lisp_sock *lisp_find_sock(struct net *net, __be16 port)
+{
+ struct lisp_sock *s;
+
+ hlist_for_each_entry_rcu(s, s_head(net, port), hlist) {
+ if (inet_sk(s->sock->sk)->inet_sport == port)
+ return s;
+ }
+ return NULL;
+}
+
+/* Find device based on IID */
+static struct lisp_dev *lisp_find_iid(struct lisp_sock *s, u32 iid)
+{
+ struct lisp_dev *lispdev;
+
+ hlist_for_each_entry_rcu(lispdev, iid_head(s, iid), hlist) {
+ if (lispdev->iid == iid)
+ return lispdev;
+ }
+ return NULL;
+}
+
+static void lisp_sock_add_dev(struct lisp_sock *s, struct lisp_dev *dev)
+{
+ __u32 iid = dev->iid;
+
+ dev->ls_socket = s;
+ hlist_add_head_rcu(&dev->hlist, iid_head(s, iid));
+}
+
+static int lisp_init(struct net_device *dev)
+{
+ struct lisp_dev *lispdev = netdev_priv(dev);
+ struct lisp_net *ln = net_generic(dev_net(dev), lisp_net_id);
+ struct lisp_sock *s;
+ int i;
+
+ /* Allocate stats space */
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+ if (!dev->tstats)
+ return -ENOMEM;
+
+ for_each_possible_cpu(i) {
+ struct pcpu_sw_netstats *lisp_stats;
+
+ lisp_stats = per_cpu_ptr(dev->tstats, i);
+ u64_stats_init(&lisp_stats->syncp);
+ }
+
+ /* Create port, if necessary */
+ spin_lock(&ln->sock_lock);
+ s = lisp_find_sock(dev_net(dev), lispdev->rcv_port);
+ if (s) {
+ /* Reuse the socket if it's the same port */
+ atomic_inc(&s->refcnt);
+ lisp_sock_add_dev(s, lispdev);
+ } else {
+ /* Make a new socket */
+ dev_hold(dev);
+ queue_work(lisp_wq, &lispdev->sock_work);
+ }
+ spin_unlock(&ln->sock_lock);
+ return 0;
+}
+
+void lisp_sock_release(struct lisp_sock *s)
+{
+ struct sock *sk = s->sock->sk;
+ struct net *net = sock_net(sk);
+ struct lisp_net *ln = net_generic(net, lisp_net_id);
+
+ if (!atomic_dec_and_test(&s->refcnt))
+ return;
+ spin_lock(&ln->sock_lock);
+ hlist_del_rcu(&s->hlist);
+ rcu_assign_sk_user_data(s->sock->sk, NULL);
+ spin_unlock(&ln->sock_lock);
+ queue_work(lisp_wq, &s->del_work);
+}
+EXPORT_SYMBOL_GPL(lisp_sock_release);
+
+static void lisp_uninit(struct net_device *dev)
+{
+ struct lisp_dev *lispdev = netdev_priv(dev);
+ struct lisp_sock *s = lispdev->ls_socket;
+
+ if (s)
+ lisp_sock_release(s);
+ free_percpu(dev->tstats);
+}
+
+static int lisp_change_mtu(struct net_device *dev, int new_mtu)
+{
+ return eth_change_mtu(dev, new_mtu);
+}
+
+static inline struct sk_buff *lisp_handle_offloads(struct sk_buff *skb,
+ bool udp_csum)
+{
+ int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+
+ return iptunnel_handle_offloads(skb, udp_csum, type);
+}
+
+static void lisp_build_header(const struct lisp_dev *dev,
+ struct sk_buff *skb, u32 saddr, u32 daddr)
+{
+ struct udphdr *udph = udp_hdr(skb);
+ struct lisphdr *lisph = (struct lisphdr *)(udph + 1);
+ struct net *net = dev_net(dev->dev);
+ __u32 iid;
+ int high, low;
+
+ udph->dest = dev->encap_port;
+
+ inet_get_local_port_range(net, &low, &high);
+ udph->source = udp_tunnel_get_src_port(low, high, skb);
+ udph->len = htons(skb->len - skb_transport_offset(skb));
+
+ /* We don't support echo nonce algorithm */
+ lisph->nonce_present = 0;
+ lisph->locator_status_bits_present = 1; /* Set LSB */
+ lisph->solicit_echo_nonce = 0; /* No echo noncing */
+
+ /* No mapping versioning, nonce instead */
+ lisph->map_version_present = 0;
+
+ /* Store the tun_id as Instance ID */
+ lisph->instance_id_present = 1;
+
+ /* Reserved flags, set to 0 */
+ lisph->reserved_flags = 0;
+ lisph->u1.nonce[0] = 0;
+ lisph->u1.nonce[1] = 0;
+ lisph->u1.nonce[2] = 0;
+
+ /* Include the instance ID for this device */
+ iid = htonl(dev->iid << 8);
+ memcpy(&lisph->u2.word2.instance_id, &iid, 3);
+ lisph->u2.word2.locator_status_bits = 1;
+
+ udp_set_csum(dev->ls_socket->sock->sk, skb, saddr, daddr,
+ skb->len);
+}
+
+/* Transmit local sourced packets with LISP encapsulation
+ */
+static netdev_tx_t lisp_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct lisp_dev *lispdev = netdev_priv(dev);
+ struct net *net = dev_net(lispdev->dev);
+ struct lisp_sock *s = lispdev->ls_socket;
+ struct rtable *rt;
+ int min_headroom;
+ __be32 saddr;
+ __be32 daddr;
+ __be16 df;
+ int sent_len;
+ int err;
+
+ if (skb->protocol != htons(ETH_P_IP) &&
+ skb->protocol != htons(ETH_P_IPV6)) {
+ kfree_skb(skb);
+ return 0;
+ }
+
+ /* Route lookup */
+ saddr = lispdev->local.sin.sin_addr.s_addr;
+ daddr = lispdev->remote.sin.sin_addr.s_addr;
+ rt = ip_route_output_mark(net,
+ &saddr,
+ daddr,
+ IPPROTO_UDP,
+ lispdev->tos,
+ skb->mark);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ goto error;
+ }
+ skb = lisp_handle_offloads(skb,
+ s->sock->sk->sk_no_check_tx);
+
+ if (IS_ERR(skb))
+ goto rx_tx_err;
+
+ min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
+ + sizeof(struct iphdr) + LISP_HLEN;
+
+ if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
+ int head_delta = SKB_DATA_ALIGN(min_headroom -
+ skb_headroom(skb) +
+ 16);
+
+ err = pskb_expand_head(skb, max_t(int, head_delta, 0),
+ 0, GFP_ATOMIC);
+ if (unlikely(err))
+ goto err_free_rt;
+ }
+
+ skb_reset_inner_headers(skb);
+
+ __skb_push(skb, LISP_HLEN);
+ skb_reset_transport_header(skb);
+
+ lisp_build_header(lispdev, skb, saddr, daddr);
+
+ /* Offloading */
+ skb->ignore_df = 1;
+
+ df = 0;
+ sent_len = iptunnel_xmit(lispdev->ls_socket->sock->sk, rt, skb,
+ saddr, daddr,
+ IPPROTO_UDP, lispdev->tos,
+ lispdev->ttl, df, false);
+
+ iptunnel_xmit_stats(sent_len, &dev->stats, dev->tstats);
+ return NETDEV_TX_OK;
+
+rx_tx_err:
+ dev->stats.tx_errors++;
+err_free_rt:
+ ip_rt_put(rt);
+error:
+ iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
+ return NETDEV_TX_OK;
+}
+
+static void lisp_rcv(struct lisp_sock *s,
+ struct sk_buff *skb)
+{
+ struct lisp_dev *lispdev;
+ struct iphdr *iph, *inner_iph;
+ struct lisphdr *lisph;
+ struct pcpu_sw_netstats *stats;
+ __be16 protocol;
+ __u32 iid = 0;
+
+ iph = ip_hdr(skb);
+ lisph = (struct lisphdr *)(udp_hdr(skb) + 1);
+ inner_iph = (struct iphdr *)(lisph + 1);
+ switch (inner_iph->version) {
+ case 4:
+ protocol = htons(ETH_P_IP);
+ break;
+ case 6:
+ protocol = htons(ETH_P_IPV6);
+ break;
+ default:
+ kfree_skb(skb);
+ return;
+ }
+
+ if (lisph->instance_id_present)
+ iid = ntohl(*((__be32 *)(&lisph->u2.word2.instance_id))) >> 8;
+
+ /* Find the IID in our configuration */
+ lispdev = lisp_find_iid(s, iid);
+ if (!lispdev) {
+ netdev_info(lispdev->dev, "Instance ID 0x%x not found\n", iid);
+ goto drop;
+ }
+
+ skb->protocol = protocol;
+ skb->dev = lispdev->dev;
+ skb_reset_network_header(skb);
+
+ stats = this_cpu_ptr(lispdev->dev->tstats);
+ u64_stats_update_begin(&stats->syncp);
+ stats->rx_packets++;
+ stats->rx_bytes += skb->len;
+ u64_stats_update_end(&stats->syncp);
+
+ netif_rx(skb);
+ return;
+drop:
+ kfree_skb(skb);
+}
+
+
+/* Callback from net/ipv4/udp.c to receive packets */
+static int lisp_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+{
+ struct lisp_sock *s;
+ __be16 port;
+
+ if (!pskb_may_pull(skb, LISP_HLEN))
+ goto error;
+
+ if (iptunnel_pull_header(skb, LISP_HLEN, 0))
+ goto drop;
+
+ port = inet_sk(sk)->inet_sport;
+ s = rcu_dereference_sk_user_data(sk);
+ if (!s)
+ goto drop;
+
+ /* If the NIC driver gave us an encapsulated packet
+ * with the encapsulation mark, the device checksummed it
+ * for us. Otherwise force the upper layers to verify it.
+ */
+ if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
+ skb->ip_summed != CHECKSUM_PARTIAL) ||
+ !skb->encapsulation)
+ skb->ip_summed = CHECKSUM_NONE;
+
+ skb->encapsulation = 0;
+ lisp_rcv(s, skb);
+ return 0;
+drop:
+ kfree_skb(skb);
+ return 0;
+error:
+ return 1;
+}
+
+static const struct net_device_ops lisp_netdev_ops = {
+ .ndo_init = lisp_init,
+ .ndo_uninit = lisp_uninit,
+ .ndo_start_xmit = lisp_xmit,
+ .ndo_get_stats64 = ip_tunnel_get_stats64,
+ .ndo_change_mtu = lisp_change_mtu
+};
+
+/* Info for udev */
+static struct device_type lisp_type = {
+ .name = "lisp",
+};
+
+static void lisp_del_work(struct work_struct *work)
+{
+ struct lisp_sock *ls = container_of(work, struct lisp_sock, del_work);
+
+ sk_release_kernel(ls->sock->sk);
+ kfree_rcu(ls, rcu);
+}
+
+/* Create new listen socket */
+static struct lisp_sock *lisp_socket_create(struct net *net, __be16 port,
+ lisp_rcv_t *rcv, void *data,
+ u32 flags)
+{
+ struct lisp_net *ln = net_generic(net, lisp_net_id);
+ struct lisp_sock *s;
+ struct socket *sock;
+ struct sock *sk;
+ int rc = 0;
+ unsigned int h;
+
+ s = kmalloc(sizeof(*s), GFP_KERNEL);
+ if (!s)
+ return ERR_PTR(-ENOMEM);
+
+ for (h = 0; h < IID_HASH_SIZE; ++h)
+ INIT_HLIST_HEAD(&s->iid_list[h]);
+
+ INIT_WORK(&s->del_work, lisp_del_work);
+
+ rc = udpv4_create_encap_sock(net, port, &sock,
+ (flags & LISP_F_UDP_CSUM));
+ if (rc < 0) {
+ kfree(s);
+ return ERR_PTR(rc);
+ }
+
+ s->sock = sock;
+ atomic_set(&s->refcnt, 1);
+ sk = sock->sk;
+ s->rcv = rcv;
+ s->data = data;
+ rcu_assign_sk_user_data(s->sock->sk, s);
+
+ spin_lock(&ln->sock_lock);
+ hlist_add_head_rcu(&s->hlist, s_head(net, port));
+ spin_unlock(&ln->sock_lock);
+ udp_sk(sk)->encap_type = 1;
+ udp_sk(sk)->encap_rcv = lisp_udp_encap_rcv;
+ udp_encap_enable();
+
+ return s;
+}
+
+struct lisp_sock *lisp_sock_add(struct net *net, __be16 port, lisp_rcv_t *rcv,
+ void *data, u32 flags)
+{
+ struct lisp_net *ln = net_generic(net, lisp_net_id);
+ struct lisp_sock *s;
+
+ s = lisp_socket_create(net, port, rcv, data, flags);
+ if (!IS_ERR(s))
+ return s;
+
+ spin_lock(&ln->sock_lock);
+ s = lisp_find_sock(net, port);
+ if (s) {
+ if (s->rcv == rcv)
+ atomic_inc(&s->refcnt);
+ else
+ s = ERR_PTR(-EBUSY);
+ }
+ spin_unlock(&ln->sock_lock);
+
+ if (!s)
+ s = ERR_PTR(-EINVAL);
+ return s;
+}
+
+/* Scheduled at device creation to bind to a socket */
+static void lisp_sock_work(struct work_struct *work)
+{
+ struct lisp_dev *lispdev = container_of(work, struct lisp_dev,
+ sock_work);
+ struct net *net = dev_net(lispdev->dev);
+ struct lisp_net *ln = net_generic(net, lisp_net_id);
+ __be16 port = lispdev->rcv_port;
+ struct lisp_sock *s;
+
+ s = lisp_sock_add(net, port, lisp_rcv, NULL, lispdev->flags);
+ spin_lock(&ln->sock_lock);
+ if (!IS_ERR(s))
+ lisp_sock_add_dev(s, lispdev);
+ spin_unlock(&ln->sock_lock);
+
+ dev_put(lispdev->dev);
+}
+
+/* Init the device structure. */
+static void lisp_setup(struct net_device *dev)
+{
+ struct lisp_dev *lispdev = netdev_priv(dev);
+
+ dev->type = ARPHRD_NONE;
+ dev->flags = IFF_NOARP;
+ dev->addr_len = 4;
+ dev->needed_headroom = LL_MAX_HEADER + sizeof(struct lisphdr) + 4;
+ dev->mtu = ETH_DATA_LEN - sizeof(struct lisphdr) - 4;
+
+ dev->netdev_ops = &lisp_netdev_ops;
+ dev->destructor = free_netdev;
+ SET_NETDEV_DEVTYPE(dev, &lisp_type);
+
+ dev->tx_queue_len = 0;
+ dev->features |= (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_NETNS_LOCAL |
+ NETIF_F_RXCSUM | NETIF_F_GSO_SOFTWARE);
+ dev->hw_features |= (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
+ NETIF_F_GSO_SOFTWARE);
+ dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+
+ INIT_LIST_HEAD(&lispdev->next);
+ INIT_WORK(&lispdev->sock_work, lisp_sock_work);
+
+ lispdev->rcv_port = htons(lisp_port);
+ lispdev->dev = dev;
+}
+
+static const struct nla_policy lisp_policy[IFLA_LISP_MAX + 1] = {
+ [IFLA_LISP_IID] = { .type = NLA_U32 },
+ [IFLA_LISP_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, daddr)},
+ [IFLA_LISP_LOCAL6] = { .len = sizeof(struct in6_addr) },
+ [IFLA_LISP_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr)},
+ [IFLA_LISP_REMOTE6] = { .len = sizeof(struct in6_addr) },
+ [IFLA_LISP_ENCAP_PORT] = { .type = NLA_U16 },
+ [IFLA_LISP_LISTEN_PORT] = { .type = NLA_U16 },
+ [IFLA_LISP_TOS] = { .type = NLA_U8 },
+ [IFLA_LISP_TTL] = { .type = NLA_U8 }
+};
+
+static int lisp_newlink(struct net *net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[])
+{
+ struct lisp_net *ln = net_generic(net, lisp_net_id);
+ struct lisp_dev *lispdev = netdev_priv(dev);
+ int err = 0;
+
+ if (data[IFLA_LISP_IID])
+ lispdev->iid = nla_get_be32(data[IFLA_LISP_IID]);
+
+ if (data[IFLA_LISP_LOCAL]) {
+ lispdev->local.sin.sin_addr.s_addr =
+ nla_get_be32(data[IFLA_LISP_LOCAL]);
+ lispdev->local.sa.sa_family = AF_INET;
+ }
+
+ if (data[IFLA_LISP_ENCAP_PORT])
+ lispdev->encap_port =
+ ntohs(nla_get_be16(data[IFLA_LISP_ENCAP_PORT]));
+
+ if (data[IFLA_LISP_LISTEN_PORT])
+ lispdev->rcv_port =
+ ntohs(nla_get_be16(data[IFLA_LISP_LISTEN_PORT]));
+
+ if (data[IFLA_LISP_REMOTE]) {
+ lispdev->remote.sin.sin_addr.s_addr =
+ nla_get_be32(data[IFLA_LISP_REMOTE]);
+ lispdev->remote.sa.sa_family = AF_INET;
+ }
+
+ if (data[IFLA_LISP_TOS])
+ lispdev->tos = nla_get_u8(data[IFLA_LISP_TOS]);
+
+ if (data[IFLA_LISP_TTL])
+ lispdev->ttl = nla_get_u8(data[IFLA_LISP_TTL]);
+
+ if (data[IFLA_LISP_UDP_CSUM] && nla_get_u8(data[IFLA_LISP_UDP_CSUM]))
+ lispdev->flags |= LISP_F_UDP_CSUM;
+ err = register_netdevice(dev);
+ if (err)
+ return err;
+
+ list_add(&lispdev->next, &ln->lisp_list);
+ return 0;
+}
+
+static void lisp_dellink(struct net_device *dev, struct list_head *head)
+{
+ struct lisp_net *ln = net_generic(dev_net(dev), lisp_net_id);
+ struct lisp_dev *lispdev = netdev_priv(dev);
+
+ spin_lock(&ln->sock_lock);
+ if (!hlist_unhashed(&lispdev->hlist))
+ hlist_del_rcu(&lispdev->hlist);
+ spin_unlock(&ln->sock_lock);
+
+ list_del(&lispdev->next);
+ unregister_netdevice_queue(dev, head);
+}
+
+static size_t lisp_get_size(const struct net_device *dev)
+{
+ return
+ /* IFLA_LISP_IID */
+ nla_total_size(4) +
+ /* IFLA_LISP_LOCAL */
+ nla_total_size(4) +
+ /* IFLA_LISP_LOCAL6 */
+ nla_total_size(sizeof(struct in6_addr)) +
+ /* IFLA_LISP_REMOTE */
+ nla_total_size(4) +
+ /* IFLA_LISP_REMOTE6 */
+ nla_total_size(sizeof(struct in6_addr)) +
+ /* IFLA_LISP_ENCAP_PORT */
+ nla_total_size(2) +
+ /* IFLA_LISP_LISTEN_PORT */
+ nla_total_size(2) +
+ /* IFLA_LISP_TOS */
+ nla_total_size(1) +
+ /* IFLA_LISP_TTL */
+ nla_total_size(1) +
+ /* IFLA_LISP_UDP_CSUM */
+ nla_total_size(1) +
+ 0;
+}
+
+/* Fill attributes into skb
+ */
+static int lisp_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+ const struct lisp_dev *lispdev = netdev_priv(dev);
+
+ /* V6 options needed for future
+ */
+ if (nla_put_u32(skb, IFLA_LISP_IID, lispdev->iid) ||
+ nla_put_u32(skb, IFLA_LISP_LOCAL,
+ lispdev->local.sin.sin_addr.s_addr) ||
+ nla_put_u32(skb, IFLA_LISP_REMOTE,
+ lispdev->remote.sin.sin_addr.s_addr) ||
+ nla_put_be16(skb, IFLA_LISP_ENCAP_PORT, lispdev->encap_port) ||
+ nla_put_be16(skb, IFLA_LISP_LISTEN_PORT, lispdev->rcv_port) ||
+ nla_put_u8(skb, IFLA_LISP_TOS, lispdev->tos) ||
+ nla_put_u8(skb, IFLA_LISP_TTL, lispdev->ttl) ||
+ nla_put_u8(skb, IFLA_LISP_UDP_CSUM,
+ !!(lispdev->flags & LISP_F_UDP_CSUM)))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int lisp_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+ return 0;
+}
+
+static struct rtnl_link_ops lisp_link_ops __read_mostly = {
+ .kind = "lisp",
+ .maxtype = IFLA_LISP_MAX,
+ .policy = lisp_policy,
+ .priv_size = sizeof(struct lisp_dev),
+ .setup = lisp_setup,
+ .validate = lisp_validate,
+ .newlink = lisp_newlink,
+ .dellink = lisp_dellink,
+ .get_size = lisp_get_size,
+ .fill_info = lisp_fill_info,
+};
+
+static __net_exit void lisp_exit_net(struct net *net)
+{
+ struct lisp_net *ln = net_generic(net, lisp_net_id);
+ struct lisp_dev *lispdev;
+
+ LIST_HEAD(list);
+
+ rtnl_lock();
+ list_for_each_entry(lispdev, &ln->lisp_list, next)
+ unregister_netdevice_queue(lispdev->dev, &list);
+ unregister_netdevice_many(&list);
+ rtnl_unlock();
+}
+
+static __net_init int lisp_init_net(struct net *net)
+{
+ struct lisp_net *ln = net_generic(net, lisp_net_id);
+ unsigned int h;
+
+ INIT_LIST_HEAD(&ln->lisp_list);
+ spin_lock_init(&ln->sock_lock);
+
+ for (h = 0; h < PORT_HASH_SIZE; ++h)
+ INIT_HLIST_HEAD(&ln->sock_list[h]);
+
+ return 0;
+}
+
+static struct pernet_operations lisp_net_ops = {
+ .init = lisp_init_net,
+ .exit = lisp_exit_net,
+ .id = &lisp_net_id,
+ .size = sizeof(struct lisp_net),
+};
+
+static int __init lisp_netdev_init(void)
+{
+ int rc;
+
+ lisp_wq = alloc_workqueue("lisp", 0, 0);
+ if (!lisp_wq)
+ return -ENOMEM;
+
+ rc = register_pernet_device(&lisp_net_ops);
+ if (rc)
+ goto out1;
+
+ rc = rtnl_link_register(&lisp_link_ops);
+ if (rc)
+ goto out2;
+
+ return 0;
+
+out2:
+ unregister_pernet_device(&lisp_net_ops);
+out1:
+ destroy_workqueue(lisp_wq);
+ return rc;
+}
+
+static void __exit lisp_netdev_cleanup(void)
+{
+ rtnl_link_unregister(&lisp_link_ops);
+ destroy_workqueue(lisp_wq);
+ unregister_pernet_device(&lisp_net_ops);
+ rcu_barrier();
+}
+
+late_initcall(lisp_netdev_init);
+module_exit(lisp_netdev_cleanup);
+
+MODULE_LICENSE("GPL");
+MODULE_VERSION(LISP_VERSION);
+MODULE_AUTHOR("Chris White <chris@logicalelegance.com>");
+MODULE_ALIAS_RTNL_LINK("lisp");
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index ade33ef..c04cce8 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1570,25 +1570,6 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
return false;
}
-/* Compute source port for outgoing packet
- * first choice to use L4 flow hash since it will spread
- * better and maybe available from hardware
- * secondary choice is to use jhash on the Ethernet header
- */
-__be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb)
-{
- unsigned int range = (port_max - port_min) + 1;
- u32 hash;
-
- hash = skb_get_hash(skb);
- if (!hash)
- hash = jhash(skb->data, 2 * ETH_ALEN,
- (__force u32) skb->protocol);
-
- return htons((((u64) hash * range) >> 32) + port_min);
-}
-EXPORT_SYMBOL_GPL(vxlan_src_port);
-
static inline struct sk_buff *vxlan_handle_offloads(struct sk_buff *skb,
bool udp_csum)
{
@@ -1807,7 +1788,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
if (tos == 1)
tos = ip_tunnel_get_dsfield(old_iph, skb);
- src_port = vxlan_src_port(vxlan->port_min, vxlan->port_max, skb);
+ src_port = udp_tunnel_get_src_port(vxlan->port_min, vxlan->port_max,
+ skb);
if (dst->sa.sa_family == AF_INET) {
memset(&fl4, 0, sizeof(fl4));
diff --git a/include/net/route.h b/include/net/route.h
index b17cf28..ff55ac5 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -131,6 +131,26 @@ static inline struct rtable *ip_route_output(struct net *net, __be32 daddr,
return ip_route_output_key(net, &fl4);
}
+static inline struct rtable *ip_route_output_mark(struct net *net,
+ __be32 *saddr, __be32 daddr,
+ u8 ipproto, u8 tos, u32 skb_mark)
+{
+ struct rtable *rt;
+
+ /* Tunnel configuration keeps DSCP part of TOS bits, But Linux
+ * router expect RT_TOS bits only.
+ */
+ struct flowi4 fl = { .daddr = daddr,
+ .saddr = *saddr,
+ .flowi4_tos = RT_TOS(tos),
+ .flowi4_mark = skb_mark,
+ .flowi4_proto = ipproto };
+
+ rt = ip_route_output_key(net, &fl);
+ *saddr = fl.saddr;
+ return rt;
+}
+
static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi4 *fl4,
struct sock *sk,
__be32 daddr, __be32 saddr,
diff --git a/include/net/udp.h b/include/net/udp.h
index 68a1fef..0b079c5 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -167,6 +167,27 @@ static inline void udp_lib_hash(struct sock *sk)
void udp_lib_unhash(struct sock *sk);
void udp_lib_rehash(struct sock *sk, u16 new_hash);
+/* Compute source port for outgoing packet
+ * first choice to use L4 flow hash since it will spread
+ * better and maybe available from hardware
+ * secondary choice is to use jhash on the Ethernet header
+ */
+static inline __be16 udp_tunnel_get_src_port(__u16 port_min, __u16 port_max,
+ struct sk_buff *skb)
+{
+ unsigned int range = (port_max - port_min) + 1;
+ u32 hash;
+
+ hash = skb_get_hash(skb);
+ if (!hash)
+ hash = jhash(skb->data, 2 * ETH_ALEN,
+ (__force u32) skb->protocol);
+
+ return htons((((u64) hash * range) >> 32) + port_min);
+}
+
+/* Compute source UDP port for outgoing packets on UDP tunnels
+ */
static inline void udp_lib_close(struct sock *sk, long timeout)
{
sk_common_release(sk);
@@ -270,7 +291,8 @@ void udp4_proc_exit(void);
#endif
int udpv4_offload_init(void);
-
+int udpv4_create_encap_sock(struct net *net, __be16 port, struct socket **psock,
+ bool csum);
void udp_init(void);
void udp_encap_enable(void);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index b385348..0077832 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -331,6 +331,23 @@ struct ifla_vxlan_port_range {
__be16 high;
};
+/* LISP section */
+enum {
+ IFLA_LISP_UNSPEC,
+ IFLA_LISP_IID,
+ IFLA_LISP_LOCAL,
+ IFLA_LISP_REMOTE,
+ IFLA_LISP_LOCAL6,
+ IFLA_LISP_REMOTE6,
+ IFLA_LISP_ENCAP_PORT,
+ IFLA_LISP_LISTEN_PORT,
+ IFLA_LISP_TOS,
+ IFLA_LISP_TTL,
+ IFLA_LISP_UDP_CSUM,
+ __IFLA_LISP_MAX
+};
+#define IFLA_LISP_MAX (__IFLA_LISP_MAX - 1)
+
/* Bonding section */
enum {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index d92f94b..c69b198 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2524,78 +2524,3 @@ void __init udp_init(void)
sysctl_udp_wmem_min = SK_MEM_QUANTUM;
}
-struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
- netdev_features_t features)
-{
- struct sk_buff *segs = ERR_PTR(-EINVAL);
- u16 mac_offset = skb->mac_header;
- int mac_len = skb->mac_len;
- int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
- __be16 protocol = skb->protocol;
- netdev_features_t enc_features;
- int udp_offset, outer_hlen;
- unsigned int oldlen;
- bool need_csum;
-
- oldlen = (u16)~skb->len;
-
- if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
- goto out;
-
- skb->encapsulation = 0;
- __skb_pull(skb, tnl_hlen);
- skb_reset_mac_header(skb);
- skb_set_network_header(skb, skb_inner_network_offset(skb));
- skb->mac_len = skb_inner_network_offset(skb);
- skb->protocol = htons(ETH_P_TEB);
-
- need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
- if (need_csum)
- skb->encap_hdr_csum = 1;
-
- /* segment inner packet. */
- enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
- segs = skb_mac_gso_segment(skb, enc_features);
- if (!segs || IS_ERR(segs)) {
- skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
- mac_len);
- goto out;
- }
-
- outer_hlen = skb_tnl_header_len(skb);
- udp_offset = outer_hlen - tnl_hlen;
- skb = segs;
- do {
- struct udphdr *uh;
- int len;
-
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
-
- skb->mac_len = mac_len;
-
- skb_push(skb, outer_hlen);
- skb_reset_mac_header(skb);
- skb_set_network_header(skb, mac_len);
- skb_set_transport_header(skb, udp_offset);
- len = skb->len - udp_offset;
- uh = udp_hdr(skb);
- uh->len = htons(len);
-
- if (need_csum) {
- __be32 delta = htonl(oldlen + len);
-
- uh->check = ~csum_fold((__force __wsum)
- ((__force u32)uh->check +
- (__force u32)delta));
- uh->check = gso_make_checksum(skb, ~uh->check);
-
- if (uh->check == 0)
- uh->check = CSUM_MANGLED_0;
- }
-
- skb->protocol = protocol;
- } while ((skb = skb->next));
-out:
- return segs;
-}
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 546d2d4..cb77404 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -248,3 +248,121 @@ int __init udpv4_offload_init(void)
{
return inet_add_offload(&udpv4_offload, IPPROTO_UDP);
}
+
+struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ u16 mac_offset = skb->mac_header;
+ int mac_len = skb->mac_len;
+ int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
+ __be16 protocol = skb->protocol;
+ netdev_features_t enc_features;
+ int udp_offset, outer_hlen;
+ unsigned int oldlen;
+ bool need_csum;
+
+ oldlen = (u16)~skb->len;
+
+ if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
+ goto out;
+
+ skb->encapsulation = 0;
+ __skb_pull(skb, tnl_hlen);
+ skb_reset_mac_header(skb);
+ skb_set_network_header(skb, skb_inner_network_offset(skb));
+ skb->mac_len = skb_inner_network_offset(skb);
+ skb->protocol = htons(ETH_P_TEB);
+
+ need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
+ if (need_csum)
+ skb->encap_hdr_csum = 1;
+
+ /* segment inner packet. */
+ enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
+ segs = skb_mac_gso_segment(skb, enc_features);
+ if (!segs || IS_ERR(segs)) {
+ skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
+ mac_len);
+ goto out;
+ }
+
+ outer_hlen = skb_tnl_header_len(skb);
+ udp_offset = outer_hlen - tnl_hlen;
+ skb = segs;
+ do {
+ struct udphdr *uh;
+ int len;
+
+ skb_reset_inner_headers(skb);
+ skb->encapsulation = 1;
+
+ skb->mac_len = mac_len;
+
+ skb_push(skb, outer_hlen);
+ skb_reset_mac_header(skb);
+ skb_set_network_header(skb, mac_len);
+ skb_set_transport_header(skb, udp_offset);
+ len = skb->len - udp_offset;
+ uh = udp_hdr(skb);
+ uh->len = htons(len);
+
+ if (need_csum) {
+ __be32 delta = htonl(oldlen + len);
+
+ uh->check = ~csum_fold((__force __wsum)
+ ((__force u32)uh->check +
+ (__force u32)delta));
+ uh->check = gso_make_checksum(skb, ~uh->check);
+
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+ }
+
+ skb->protocol = protocol;
+ } while ((skb = skb->next));
+out:
+ return segs;
+}
+
+int udpv4_create_encap_sock(struct net *net, __be16 port, struct socket **psock,
+ bool csum)
+{
+ struct sock *sk;
+ struct socket *sock;
+ struct sockaddr_in lisp_addr = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_ANY),
+ .sin_port = port,
+ };
+ int rc;
+
+ /* Create UDP socket for encapsulation receive. */
+ rc = sock_create_kern(AF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+ if (rc < 0) {
+ pr_debug("UDP socket create failed\n");
+ return rc;
+ }
+
+ /* Put in proper namespace */
+ sk = sock->sk;
+ sk_change_net(sk, net);
+
+ rc = kernel_bind(sock, (struct sockaddr *)&lisp_addr,
+ sizeof(lisp_addr));
+ if (rc < 0) {
+ pr_debug("bind for UDP socket %pI4:%u (%d)\n",
+ &lisp_addr.sin_addr, ntohs(lisp_addr.sin_port), rc);
+ sk_release_kernel(sk);
+ return rc;
+ }
+
+ *psock = sock;
+ /* Disable multicast loopback */
+ inet_sk(sk)->mc_loop = 0;
+
+ if (!csum)
+ sock->sk->sk_no_check_tx = 1;
+ return 0;
+}
+EXPORT_SYMBOL(udpv4_create_encap_sock);
--
1.7.10.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH V3 net-next] LISP: Locator/Identifier Separation Protocol
2014-06-18 23:07 Christopher White
@ 2014-06-19 0:08 ` Tom Herbert
2014-06-19 16:31 ` Christopher White
0 siblings, 1 reply; 7+ messages in thread
From: Tom Herbert @ 2014-06-19 0:08 UTC (permalink / raw)
To: Christopher White
Cc: Linux Netdev List, Vina Ermagan (vermagan),
Lorand Jakab -X (lojakab - M SQUARED CONSULTING INC. at Cisco)
> +/* Compute source port for outgoing packet
> + * first choice to use L4 flow hash since it will spread
> + * better and maybe available from hardware
> + * secondary choice is to use jhash on the Ethernet header
> + */
> +static inline __be16 udp_tunnel_get_src_port(__u16 port_min, __u16 port_max,
> + struct sk_buff *skb)
> +{
> + unsigned int range = (port_max - port_min) + 1;
> + u32 hash;
> +
> + hash = skb_get_hash(skb);
> + if (!hash)
> + hash = jhash(skb->data, 2 * ETH_ALEN,
> + (__force u32) skb->protocol);
> +
This is probably okay for now, but we should really be smarter here.
Looks like another consumer of a TX hash in skbuf (Eric was looking to
add that I believe).
> + return htons((((u64) hash * range) >> 32) + port_min);
> +}
> +
> +/* Compute source UDP port for outgoing packets on UDP tunnels
> + */
> static inline void udp_lib_close(struct sock *sk, long timeout)
> {
> sk_common_release(sk);
> @@ -270,7 +291,8 @@ void udp4_proc_exit(void);
> #endif
>
> int udpv4_offload_init(void);
> -
> +int udpv4_create_encap_sock(struct net *net, __be16 port, struct socket **psock,
> + bool csum);
> void udp_init(void);
>
> void udp_encap_enable(void);
> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
> index b385348..0077832 100644
> --- a/include/uapi/linux/if_link.h
> +++ b/include/uapi/linux/if_link.h
> @@ -331,6 +331,23 @@ struct ifla_vxlan_port_range {
> __be16 high;
> };
>
> +/* LISP section */
> +enum {
> + IFLA_LISP_UNSPEC,
> + IFLA_LISP_IID,
> + IFLA_LISP_LOCAL,
> + IFLA_LISP_REMOTE,
> + IFLA_LISP_LOCAL6,
> + IFLA_LISP_REMOTE6,
> + IFLA_LISP_ENCAP_PORT,
> + IFLA_LISP_LISTEN_PORT,
> + IFLA_LISP_TOS,
> + IFLA_LISP_TTL,
> + IFLA_LISP_UDP_CSUM,
> + __IFLA_LISP_MAX
> +};
> +#define IFLA_LISP_MAX (__IFLA_LISP_MAX - 1)
> +
> /* Bonding section */
>
> enum {
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index d92f94b..c69b198 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -2524,78 +2524,3 @@ void __init udp_init(void)
> sysctl_udp_wmem_min = SK_MEM_QUANTUM;
> }
>
> -struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
> - netdev_features_t features)
> -{
> - struct sk_buff *segs = ERR_PTR(-EINVAL);
> - u16 mac_offset = skb->mac_header;
> - int mac_len = skb->mac_len;
> - int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
> - __be16 protocol = skb->protocol;
> - netdev_features_t enc_features;
> - int udp_offset, outer_hlen;
> - unsigned int oldlen;
> - bool need_csum;
> -
> - oldlen = (u16)~skb->len;
> -
> - if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
> - goto out;
> -
> - skb->encapsulation = 0;
> - __skb_pull(skb, tnl_hlen);
> - skb_reset_mac_header(skb);
> - skb_set_network_header(skb, skb_inner_network_offset(skb));
> - skb->mac_len = skb_inner_network_offset(skb);
> - skb->protocol = htons(ETH_P_TEB);
> -
> - need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
> - if (need_csum)
> - skb->encap_hdr_csum = 1;
> -
> - /* segment inner packet. */
> - enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
> - segs = skb_mac_gso_segment(skb, enc_features);
> - if (!segs || IS_ERR(segs)) {
> - skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
> - mac_len);
> - goto out;
> - }
> -
> - outer_hlen = skb_tnl_header_len(skb);
> - udp_offset = outer_hlen - tnl_hlen;
> - skb = segs;
> - do {
> - struct udphdr *uh;
> - int len;
> -
> - skb_reset_inner_headers(skb);
> - skb->encapsulation = 1;
> -
> - skb->mac_len = mac_len;
> -
> - skb_push(skb, outer_hlen);
> - skb_reset_mac_header(skb);
> - skb_set_network_header(skb, mac_len);
> - skb_set_transport_header(skb, udp_offset);
> - len = skb->len - udp_offset;
> - uh = udp_hdr(skb);
> - uh->len = htons(len);
> -
> - if (need_csum) {
> - __be32 delta = htonl(oldlen + len);
> -
> - uh->check = ~csum_fold((__force __wsum)
> - ((__force u32)uh->check +
> - (__force u32)delta));
> - uh->check = gso_make_checksum(skb, ~uh->check);
> -
> - if (uh->check == 0)
> - uh->check = CSUM_MANGLED_0;
> - }
> -
> - skb->protocol = protocol;
> - } while ((skb = skb->next));
> -out:
> - return segs;
> -}
> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> index 546d2d4..cb77404 100644
> --- a/net/ipv4/udp_offload.c
> +++ b/net/ipv4/udp_offload.c
> @@ -248,3 +248,121 @@ int __init udpv4_offload_init(void)
> {
> return inet_add_offload(&udpv4_offload, IPPROTO_UDP);
> }
> +
> +struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
> + netdev_features_t features)
> +{
> + struct sk_buff *segs = ERR_PTR(-EINVAL);
> + u16 mac_offset = skb->mac_header;
> + int mac_len = skb->mac_len;
> + int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
> + __be16 protocol = skb->protocol;
> + netdev_features_t enc_features;
> + int udp_offset, outer_hlen;
> + unsigned int oldlen;
> + bool need_csum;
> +
> + oldlen = (u16)~skb->len;
> +
> + if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
> + goto out;
> +
> + skb->encapsulation = 0;
> + __skb_pull(skb, tnl_hlen);
> + skb_reset_mac_header(skb);
> + skb_set_network_header(skb, skb_inner_network_offset(skb));
> + skb->mac_len = skb_inner_network_offset(skb);
> + skb->protocol = htons(ETH_P_TEB);
> +
> + need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
> + if (need_csum)
> + skb->encap_hdr_csum = 1;
> +
> + /* segment inner packet. */
> + enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
> + segs = skb_mac_gso_segment(skb, enc_features);
> + if (!segs || IS_ERR(segs)) {
> + skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
> + mac_len);
> + goto out;
> + }
> +
> + outer_hlen = skb_tnl_header_len(skb);
> + udp_offset = outer_hlen - tnl_hlen;
> + skb = segs;
> + do {
> + struct udphdr *uh;
> + int len;
> +
> + skb_reset_inner_headers(skb);
> + skb->encapsulation = 1;
> +
> + skb->mac_len = mac_len;
> +
> + skb_push(skb, outer_hlen);
> + skb_reset_mac_header(skb);
> + skb_set_network_header(skb, mac_len);
> + skb_set_transport_header(skb, udp_offset);
> + len = skb->len - udp_offset;
> + uh = udp_hdr(skb);
> + uh->len = htons(len);
> +
> + if (need_csum) {
> + __be32 delta = htonl(oldlen + len);
> +
> + uh->check = ~csum_fold((__force __wsum)
> + ((__force u32)uh->check +
> + (__force u32)delta));
> + uh->check = gso_make_checksum(skb, ~uh->check);
> +
> + if (uh->check == 0)
> + uh->check = CSUM_MANGLED_0;
> + }
> +
> + skb->protocol = protocol;
> + } while ((skb = skb->next));
> +out:
> + return segs;
> +}
> +
Please split out generic changes into their own patches.
> +int udpv4_create_encap_sock(struct net *net, __be16 port, struct socket **psock,
> + bool csum)
> +{
> + struct sock *sk;
> + struct socket *sock;
> + struct sockaddr_in lisp_addr = {
> + .sin_family = AF_INET,
> + .sin_addr.s_addr = htonl(INADDR_ANY),
> + .sin_port = port,
> + };
> + int rc;
> +
Still some lisp artifacts.
> + /* Create UDP socket for encapsulation receive. */
> + rc = sock_create_kern(AF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
> + if (rc < 0) {
> + pr_debug("UDP socket create failed\n");
> + return rc;
> + }
> +
> + /* Put in proper namespace */
> + sk = sock->sk;
> + sk_change_net(sk, net);
> +
> + rc = kernel_bind(sock, (struct sockaddr *)&lisp_addr,
> + sizeof(lisp_addr));
> + if (rc < 0) {
> + pr_debug("bind for UDP socket %pI4:%u (%d)\n",
> + &lisp_addr.sin_addr, ntohs(lisp_addr.sin_port), rc);
> + sk_release_kernel(sk);
> + return rc;
> + }
> +
> + *psock = sock;
> + /* Disable multicast loopback */
> + inet_sk(sk)->mc_loop = 0;
> +
> + if (!csum)
> + sock->sk->sk_no_check_tx = 1;
> + return 0;
> +}
> +EXPORT_SYMBOL(udpv4_create_encap_sock)
I was actually thinking this function could be even more general. The
L2TP_ENCAPTYPE_UDP case in l2tp_tunnel_sock_create looks like
something we might be able to abstract out into a separate function--
it would include IPv6 support and the possibility of binding to other
than INADDR_ANY and connected sockets. l2tp_tunnel_cfg could be the
basis of udp_port_cfg which contains addresses, ports, and sockopts
like sk_no_check_tx...
> --
> 1.7.10.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH V3 net-next] LISP: Locator/Identifier Separation Protocol
2014-06-19 0:08 ` Tom Herbert
@ 2014-06-19 16:31 ` Christopher White
2014-06-19 17:29 ` Tom Herbert
0 siblings, 1 reply; 7+ messages in thread
From: Christopher White @ 2014-06-19 16:31 UTC (permalink / raw)
To: Tom Herbert
Cc: Linux Netdev List, Vina Ermagan (vermagan),
Lorand Jakab -X (lojakab - M SQUARED CONSULTING INC. at Cisco)
Hi Tom,
Thanks again for the quick turnaround. Some responses and questions below…
On Jun 18, 2014, at 5:08 PM, Tom Herbert <therbert@google.com> wrote:
>>
>> + hash = skb_get_hash(skb);
>> + if (!hash)
>> + hash = jhash(skb->data, 2 * ETH_ALEN,
>> + (__force u32) skb->protocol);
>> +
>
> This is probably okay for now, but we should really be smarter here.
> Looks like another consumer of a TX hash in skbuf (Eric was looking to
> add that I believe).
Ok.
> >check = CSUM_MANGLED_0;
>> + }
>> +
>> + skb->protocol = protocol;
>> + } while ((skb = skb->next));
>> +out:
>> + return segs;
>> +}
>> +
> Please split out generic changes into their own patches.
>
Will do.
>> +int udpv4_create_encap_sock(struct net *net, __be16 port, struct socket **psock,
>> + bool csum)
>> +{
>> + struct sock *sk;
>> + struct socket *sock;
>> + struct sockaddr_in lisp_addr = {
>> + .sin_family = AF_INET,
>> + .sin_addr.s_addr = htonl(INADDR_ANY),
>> + .sin_port = port,
>> + };
>> + int rc;
>> +
> Still some lisp artifacts.
Sorry, that got past me, will fix.
>>
>> + }
>> +
>> + *psock = sock;
>> + /* Disable multicast loopback */
>> + inet_sk(sk)->mc_loop = 0;
>> +
>> + if (!csum)
>> + sock->sk->sk_no_check_tx = 1;
>> + return 0;
>> +}
>> +EXPORT_SYMBOL(udpv4_create_encap_sock)
>
> I was actually thinking this function could be even more general. The
> L2TP_ENCAPTYPE_UDP case in l2tp_tunnel_sock_create looks like
> something we might be able to abstract out into a separate function--
> it would include IPv6 support and the possibility of binding to other
> than INADDR_ANY and connected sockets. l2tp_tunnel_cfg could be the
> basis of udp_port_cfg which contains addresses, ports, and sockopts
> like sk_no_check_tx…
>
So I’m happy to do this, but I would like to do it in a separate submission.
For one thing, I am not super familiar with that code and I’m a bit nervous
about making substantive changes, thus it may take me some time.
As it is orthogonal to the LISP submission, would it be possible to do this
separately and have LISP move forward?
Thanks!
-Chris
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH V3 net-next] LISP: Locator/Identifier Separation Protocol
2014-06-19 16:31 ` Christopher White
@ 2014-06-19 17:29 ` Tom Herbert
0 siblings, 0 replies; 7+ messages in thread
From: Tom Herbert @ 2014-06-19 17:29 UTC (permalink / raw)
To: Christopher White
Cc: Linux Netdev List, Vina Ermagan (vermagan),
Lorand Jakab -X (lojakab - M SQUARED CONSULTING INC. at Cisco)
>> I was actually thinking this function could be even more general. The
>> L2TP_ENCAPTYPE_UDP case in l2tp_tunnel_sock_create looks like
>> something we might be able to abstract out into a separate function--
>> it would include IPv6 support and the possibility of binding to other
>> than INADDR_ANY and connected sockets. l2tp_tunnel_cfg could be the
>> basis of udp_port_cfg which contains addresses, ports, and sockopts
>> like sk_no_check_tx…
>>
>
> So I’m happy to do this, but I would like to do it in a separate submission.
> For one thing, I am not super familiar with that code and I’m a bit nervous
> about making substantive changes, thus it may take me some time.
> As it is orthogonal to the LISP submission, would it be possible to do this
> separately and have LISP move forward?
>
Okay, I'll post an implementation of the general function once
net-next opens up.
> Thanks!
> -Chris
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH V3 net-next] LISP: Locator/Identifier Separation Protocol
@ 2014-06-19 22:05 Christopher White
2014-06-20 4:11 ` David Miller
0 siblings, 1 reply; 7+ messages in thread
From: Christopher White @ 2014-06-19 22:05 UTC (permalink / raw)
To: Linux Netdev List
Cc: Vina Ermagan (vermagan),
Lorand Jakab -X (lojakab - M SQUARED CONSULTING INC. at Cisco)
This is a static tunnel implementation of LISP as described in RFC 6830:
http://tools.ietf.org/html/rfc6830
This driver provides point-to-point LISP dataplane
encapsulation/decapsulation for statically configured endpoints. It provides
support for IPv4 in IPv4 and IPv6 in IPv4. IPv6 outer headers are not
supported yet. Instance ID is supported on a per device basis.
This implementation has been tested against LISPMob.
Changes from V2: Move some functions to common headers. Remove unecessary skb
ownership change. Minor cleanup.
Changes from V3: Revert some generic function consolidation for later patches.
Signed-off-by: Chris White <chris@logicalelegance.com>
---
drivers/net/Kconfig | 12 +
drivers/net/Makefile | 1 +
drivers/net/lisp.c | 899 ++++++++++++++++++++++++++++++++++++++++++
drivers/net/vxlan.c | 22 +-
include/net/route.h | 20 +
include/net/udp.h | 21 +
include/uapi/linux/if_link.h | 17 +
7 files changed, 972 insertions(+), 20 deletions(-)
create mode 100644 drivers/net/lisp.c
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 89402c3..5d49b1e 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -158,6 +158,18 @@ config VXLAN
To compile this driver as a module, choose M here: the module
will be called vxlan.
+config LISP
+ tristate "Locator Identifier Separation Protocol (LISP)"
+ depends on INET
+ select NET_IP_TUNNEL
+ ---help---
+ Create a LISP virtual interface that provides static LISP tunnel
+ encapsulation. For more information see:
+ http://tools.ietf.org/html/rfc6830
+
+ To compile this driver as a module, choose M here: the module will be
+ called lisp.
+
config NETCONSOLE
tristate "Network console logging support"
---help---
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 3fef8a8..943590d 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_VETH) += veth.o
obj-$(CONFIG_VIRTIO_NET) += virtio_net.o
obj-$(CONFIG_VXLAN) += vxlan.o
obj-$(CONFIG_NLMON) += nlmon.o
+obj-$(CONFIG_LISP) += lisp.o
#
# Networking Drivers
diff --git a/drivers/net/lisp.c b/drivers/net/lisp.c
new file mode 100644
index 0000000..0265285
--- /dev/null
+++ b/drivers/net/lisp.c
@@ -0,0 +1,899 @@
+/*
+ * lisp.c
+ * This file is part of LISP Implementation.
+ * It provides a netdevice for static tunneling between LISP
+ * devices. IPv4 encapsulation is currently supported.
+ *
+ * Copyright (C) 2014 Cisco Systems, Inc, 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * Written or modified by:
+ * Chris White <chris@logicalelegance.com>
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+#include <linux/rculist.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/igmp.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/hash.h>
+#include <linux/ethtool.h>
+#include <net/arp.h>
+#include <net/ndisc.h>
+#include <net/ip.h>
+#include <net/ip_tunnels.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/rtnetlink.h>
+#include <net/route.h>
+#include <net/dsfield.h>
+#include <net/inet_ecn.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#include <net/ip6_tunnel.h>
+#include <net/ip6_checksum.h>
+#endif
+#include <net/route.h>
+#include <net/xfrm.h>
+#include <linux/in_route.h>
+#include <linux/version.h>
+
+#define LISP_VERSION "0.1"
+
+static inline void vlan_set_tci(struct sk_buff *skb, u16 vlan_tci)
+{
+ skb->vlan_tci = vlan_tci;
+}
+
+#define PORT_HASH_BITS 8
+#define PORT_HASH_SIZE (1 << PORT_HASH_BITS)
+
+/**
+ * struct lisphdr - LISP header
+ * @nonce_present: Flag indicating the presence of a 24 bit nonce value.
+ * @locator_status_bits_present: Flag indicating the presence of Locator Status
+ * Bits (LSB).
+ * @solicit_echo_nonce: Flag indicating the use of the echo noncing mechanism.
+ * @map_version_present: Flag indicating the use of mapping versioning.
+ * @instance_id_present: Flag indicating the presence of a 24 bit Instance ID.
+ * @reserved_flags: 3 bits reserved for future flags.
+ * @nonce: 24 bit nonce value.
+ * @map_version: 24 bit mapping version.
+ * @locator_status_bits: Locator Status Bits: 32 bits when instance_id_present
+ * is not set, 8 bits when it is.
+ * @instance_id: 24 bit Instance ID
+ */
+struct lisphdr {
+#ifdef __LITTLE_ENDIAN_BITFIELD
+ __u8 reserved_flags : 3;
+ __u8 instance_id_present : 1;
+ __u8 map_version_present : 1;
+ __u8 solicit_echo_nonce : 1;
+ __u8 locator_status_bits_present : 1;
+ __u8 nonce_present : 1;
+#else
+ __u8 nonce_present : 1;
+ __u8 locator_status_bits_present : 1;
+ __u8 solicit_echo_nonce : 1;
+ __u8 map_version_present : 1;
+ __u8 instance_id_present : 1;
+ __u8 reserved_flags : 3;
+#endif
+ union {
+ __u8 nonce[3];
+ __u8 map_version[3];
+ } u1;
+ union {
+ __be32 locator_status_bits;
+ struct {
+ __u8 instance_id[3];
+ __u8 locator_status_bits;
+ } word2;
+ } u2;
+};
+
+#define LISP_HLEN (sizeof(struct udphdr) + sizeof(struct lisphdr))
+
+/* UDP port for LISP traffic.
+ * The IANA assigned port is 4341.
+ */
+static unsigned short lisp_port __read_mostly = 4341;
+module_param_named(udp_port, lisp_port, ushort, 0444);
+MODULE_PARM_DESC(udp_port, "Destination UDP port");
+static int lisp_net_id;
+
+/* per-network namespace private data for this module */
+struct lisp_net {
+ struct list_head lisp_list;
+ struct hlist_head sock_list[PORT_HASH_SIZE];
+ spinlock_t sock_lock;
+};
+
+union lisp_addr {
+struct sockaddr_in sin;
+ struct sockaddr_in6 sin6;
+ struct sockaddr sa;
+};
+
+#define IID_HASH_BITS 10
+#define IID_HASH_SIZE (1 << IID_HASH_BITS)
+
+struct lisp_sock;
+typedef void (lisp_rcv_t)(struct lisp_sock *ls, struct sk_buff *skb);
+
+/* per UDP socket information */
+struct lisp_sock {
+ struct hlist_node hlist;
+ lisp_rcv_t *rcv;
+ void *data;
+ struct work_struct del_work;
+ struct socket *sock;
+ struct rcu_head rcu;
+ struct hlist_head iid_list[IID_HASH_SIZE];
+ atomic_t refcnt;
+};
+
+/* LISP psuedo network device */
+struct lisp_dev {
+ struct hlist_node hlist;
+ struct list_head next;
+ struct net_device *dev;
+ u32 iid; /* Instance ID */
+ struct lisp_sock *ls_socket; /* Input socket */
+ __be16 rcv_port; /* Listen port to receive packets */
+ __be16 encap_port; /* Dest port for encaped packets */
+ __u8 tos;
+ __u8 ttl;
+ u32 flags;
+ union lisp_addr remote; /* Tunnel dst (RLOC) */
+ union lisp_addr local; /* Tunnel src (our RLOC) */
+ struct work_struct sock_work;
+};
+
+#define LISP_F_UDP_CSUM 0x1
+
+static struct workqueue_struct *lisp_wq;
+
+/* Instance ID hash table head */
+static inline struct hlist_head *iid_head(struct lisp_sock *s, u32 iid)
+{
+ return &s->iid_list[hash_32(iid, IID_HASH_BITS)];
+}
+
+/* Socket hash table head */
+static inline struct hlist_head *s_head(struct net *net, __be16 port)
+{
+ struct lisp_net *ln = net_generic(net, lisp_net_id);
+
+ return &ln->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
+}
+
+/* Find LISP socket based on network namespace and UDP port */
+static struct lisp_sock *lisp_find_sock(struct net *net, __be16 port)
+{
+ struct lisp_sock *s;
+
+ hlist_for_each_entry_rcu(s, s_head(net, port), hlist) {
+ if (inet_sk(s->sock->sk)->inet_sport == port)
+ return s;
+ }
+ return NULL;
+}
+
+/* Find device based on IID */
+static struct lisp_dev *lisp_find_iid(struct lisp_sock *s, u32 iid)
+{
+ struct lisp_dev *lispdev;
+
+ hlist_for_each_entry_rcu(lispdev, iid_head(s, iid), hlist) {
+ if (lispdev->iid == iid)
+ return lispdev;
+ }
+ return NULL;
+}
+
+static void lisp_sock_add_dev(struct lisp_sock *s, struct lisp_dev *dev)
+{
+ __u32 iid = dev->iid;
+
+ dev->ls_socket = s;
+ hlist_add_head_rcu(&dev->hlist, iid_head(s, iid));
+}
+
+static int lisp_init(struct net_device *dev)
+{
+ struct lisp_dev *lispdev = netdev_priv(dev);
+ struct lisp_net *ln = net_generic(dev_net(dev), lisp_net_id);
+ struct lisp_sock *s;
+ int i;
+
+ /* Allocate stats space */
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+ if (!dev->tstats)
+ return -ENOMEM;
+
+ for_each_possible_cpu(i) {
+ struct pcpu_sw_netstats *lisp_stats;
+
+ lisp_stats = per_cpu_ptr(dev->tstats, i);
+ u64_stats_init(&lisp_stats->syncp);
+ }
+
+ /* Create port, if necessary */
+ spin_lock(&ln->sock_lock);
+ s = lisp_find_sock(dev_net(dev), lispdev->rcv_port);
+ if (s) {
+ /* Reuse the socket if it's the same port */
+ atomic_inc(&s->refcnt);
+ lisp_sock_add_dev(s, lispdev);
+ } else {
+ /* Make a new socket */
+ dev_hold(dev);
+ queue_work(lisp_wq, &lispdev->sock_work);
+ }
+ spin_unlock(&ln->sock_lock);
+ return 0;
+}
+
+void lisp_sock_release(struct lisp_sock *s)
+{
+ struct sock *sk = s->sock->sk;
+ struct net *net = sock_net(sk);
+ struct lisp_net *ln = net_generic(net, lisp_net_id);
+
+ if (!atomic_dec_and_test(&s->refcnt))
+ return;
+ spin_lock(&ln->sock_lock);
+ hlist_del_rcu(&s->hlist);
+ rcu_assign_sk_user_data(s->sock->sk, NULL);
+ spin_unlock(&ln->sock_lock);
+ queue_work(lisp_wq, &s->del_work);
+}
+EXPORT_SYMBOL_GPL(lisp_sock_release);
+
+static void lisp_uninit(struct net_device *dev)
+{
+ struct lisp_dev *lispdev = netdev_priv(dev);
+ struct lisp_sock *s = lispdev->ls_socket;
+
+ if (s)
+ lisp_sock_release(s);
+ free_percpu(dev->tstats);
+}
+
+static int lisp_change_mtu(struct net_device *dev, int new_mtu)
+{
+ return eth_change_mtu(dev, new_mtu);
+}
+
+static inline struct sk_buff *lisp_handle_offloads(struct sk_buff *skb,
+ bool udp_csum)
+{
+ int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+
+ return iptunnel_handle_offloads(skb, udp_csum, type);
+}
+
+static void lisp_build_header(const struct lisp_dev *dev,
+ struct sk_buff *skb, u32 saddr, u32 daddr)
+{
+ struct udphdr *udph = udp_hdr(skb);
+ struct lisphdr *lisph = (struct lisphdr *)(udph + 1);
+ struct net *net = dev_net(dev->dev);
+ __u32 iid;
+ int high, low;
+
+ udph->dest = dev->encap_port;
+
+ inet_get_local_port_range(net, &low, &high);
+ udph->source = udp_tunnel_get_src_port(low, high, skb);
+ udph->len = htons(skb->len - skb_transport_offset(skb));
+
+ /* We don't support echo nonce algorithm */
+ lisph->nonce_present = 0;
+ lisph->locator_status_bits_present = 1; /* Set LSB */
+ lisph->solicit_echo_nonce = 0; /* No echo noncing */
+
+ /* No mapping versioning, nonce instead */
+ lisph->map_version_present = 0;
+
+ /* Store the tun_id as Instance ID */
+ lisph->instance_id_present = 1;
+
+ /* Reserved flags, set to 0 */
+ lisph->reserved_flags = 0;
+ lisph->u1.nonce[0] = 0;
+ lisph->u1.nonce[1] = 0;
+ lisph->u1.nonce[2] = 0;
+
+ /* Include the instance ID for this device */
+ iid = htonl(dev->iid << 8);
+ memcpy(&lisph->u2.word2.instance_id, &iid, 3);
+ lisph->u2.word2.locator_status_bits = 1;
+
+ udp_set_csum(dev->ls_socket->sock->sk, skb, saddr, daddr,
+ skb->len);
+}
+
+/* Transmit local sourced packets with LISP encapsulation
+ */
+static netdev_tx_t lisp_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct lisp_dev *lispdev = netdev_priv(dev);
+ struct net *net = dev_net(lispdev->dev);
+ struct lisp_sock *s = lispdev->ls_socket;
+ struct rtable *rt;
+ int min_headroom;
+ __be32 saddr;
+ __be32 daddr;
+ __be16 df;
+ int sent_len;
+ int err;
+
+ if (skb->protocol != htons(ETH_P_IP) &&
+ skb->protocol != htons(ETH_P_IPV6)) {
+ kfree_skb(skb);
+ return 0;
+ }
+
+ /* Route lookup */
+ saddr = lispdev->local.sin.sin_addr.s_addr;
+ daddr = lispdev->remote.sin.sin_addr.s_addr;
+ rt = ip_route_output_mark(net,
+ &saddr,
+ daddr,
+ IPPROTO_UDP,
+ lispdev->tos,
+ skb->mark);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ goto error;
+ }
+ skb = lisp_handle_offloads(skb,
+ s->sock->sk->sk_no_check_tx);
+
+ if (IS_ERR(skb))
+ goto rx_tx_err;
+
+ min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
+ + sizeof(struct iphdr) + LISP_HLEN;
+
+ if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
+ int head_delta = SKB_DATA_ALIGN(min_headroom -
+ skb_headroom(skb) +
+ 16);
+
+ err = pskb_expand_head(skb, max_t(int, head_delta, 0),
+ 0, GFP_ATOMIC);
+ if (unlikely(err))
+ goto err_free_rt;
+ }
+
+ skb_reset_inner_headers(skb);
+
+ __skb_push(skb, LISP_HLEN);
+ skb_reset_transport_header(skb);
+
+ lisp_build_header(lispdev, skb, saddr, daddr);
+
+ /* Offloading */
+ skb->ignore_df = 1;
+
+ df = 0;
+ sent_len = iptunnel_xmit(lispdev->ls_socket->sock->sk, rt, skb,
+ saddr, daddr,
+ IPPROTO_UDP, lispdev->tos,
+ lispdev->ttl, df, false);
+
+ iptunnel_xmit_stats(sent_len, &dev->stats, dev->tstats);
+ return NETDEV_TX_OK;
+
+rx_tx_err:
+ dev->stats.tx_errors++;
+err_free_rt:
+ ip_rt_put(rt);
+error:
+ iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
+ return NETDEV_TX_OK;
+}
+
+static void lisp_rcv(struct lisp_sock *s,
+ struct sk_buff *skb)
+{
+ struct lisp_dev *lispdev;
+ struct iphdr *iph, *inner_iph;
+ struct lisphdr *lisph;
+ struct pcpu_sw_netstats *stats;
+ __be16 protocol;
+ __u32 iid = 0;
+
+ iph = ip_hdr(skb);
+ lisph = (struct lisphdr *)(udp_hdr(skb) + 1);
+ inner_iph = (struct iphdr *)(lisph + 1);
+ switch (inner_iph->version) {
+ case 4:
+ protocol = htons(ETH_P_IP);
+ break;
+ case 6:
+ protocol = htons(ETH_P_IPV6);
+ break;
+ default:
+ kfree_skb(skb);
+ return;
+ }
+
+ if (lisph->instance_id_present)
+ iid = ntohl(*((__be32 *)(&lisph->u2.word2.instance_id))) >> 8;
+
+ /* Find the IID in our configuration */
+ lispdev = lisp_find_iid(s, iid);
+ if (!lispdev) {
+ netdev_info(lispdev->dev, "Instance ID 0x%x not found\n", iid);
+ goto drop;
+ }
+
+ skb->protocol = protocol;
+ skb->dev = lispdev->dev;
+ skb_reset_network_header(skb);
+
+ stats = this_cpu_ptr(lispdev->dev->tstats);
+ u64_stats_update_begin(&stats->syncp);
+ stats->rx_packets++;
+ stats->rx_bytes += skb->len;
+ u64_stats_update_end(&stats->syncp);
+
+ netif_rx(skb);
+ return;
+drop:
+ kfree_skb(skb);
+}
+
+
+/* Callback from net/ipv4/udp.c to receive packets */
+static int lisp_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+{
+ struct lisp_sock *s;
+ __be16 port;
+
+ if (!pskb_may_pull(skb, LISP_HLEN))
+ goto error;
+
+ if (iptunnel_pull_header(skb, LISP_HLEN, 0))
+ goto drop;
+
+ port = inet_sk(sk)->inet_sport;
+ s = rcu_dereference_sk_user_data(sk);
+ if (!s)
+ goto drop;
+
+ /* If the NIC driver gave us an encapsulated packet
+ * with the encapsulation mark, the device checksummed it
+ * for us. Otherwise force the upper layers to verify it.
+ */
+ if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
+ skb->ip_summed != CHECKSUM_PARTIAL) ||
+ !skb->encapsulation)
+ skb->ip_summed = CHECKSUM_NONE;
+
+ skb->encapsulation = 0;
+ lisp_rcv(s, skb);
+ return 0;
+drop:
+ kfree_skb(skb);
+ return 0;
+error:
+ return 1;
+}
+
+static const struct net_device_ops lisp_netdev_ops = {
+ .ndo_init = lisp_init,
+ .ndo_uninit = lisp_uninit,
+ .ndo_start_xmit = lisp_xmit,
+ .ndo_get_stats64 = ip_tunnel_get_stats64,
+ .ndo_change_mtu = lisp_change_mtu
+};
+
+/* Info for udev */
+static struct device_type lisp_type = {
+ .name = "lisp",
+};
+
+static void lisp_del_work(struct work_struct *work)
+{
+ struct lisp_sock *ls = container_of(work, struct lisp_sock, del_work);
+
+ sk_release_kernel(ls->sock->sk);
+ kfree_rcu(ls, rcu);
+}
+
+static int create_v4_encap_sock(struct net *net, __be16 port,
+ struct socket **psock,
+ bool csum)
+{
+ struct sock *sk;
+ struct socket *sock;
+ struct sockaddr_in lisp_addr = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_ANY),
+ .sin_port = port,
+ };
+ int rc;
+
+ /* Create UDP socket for encapsulation receive. */
+ rc = sock_create_kern(AF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+ if (rc < 0) {
+ pr_debug("UDP socket create failed\n");
+ return rc;
+ }
+
+ /* Put in proper namespace */
+ sk = sock->sk;
+ sk_change_net(sk, net);
+
+ rc = kernel_bind(sock, (struct sockaddr *)&lisp_addr,
+ sizeof(lisp_addr));
+ if (rc < 0) {
+ pr_debug("bind for UDP socket %pI4:%u (%d)\n",
+ &lisp_addr.sin_addr, ntohs(lisp_addr.sin_port), rc);
+ sk_release_kernel(sk);
+ return rc;
+ }
+
+ *psock = sock;
+ /* Disable multicast loopback */
+ inet_sk(sk)->mc_loop = 0;
+
+ if (!csum)
+ sock->sk->sk_no_check_tx = 1;
+ return 0;
+}
+
+/* Create new listen socket */
+static struct lisp_sock *lisp_socket_create(struct net *net, __be16 port,
+ lisp_rcv_t *rcv, void *data,
+ u32 flags)
+{
+ struct lisp_net *ln = net_generic(net, lisp_net_id);
+ struct lisp_sock *s;
+ struct socket *sock;
+ struct sock *sk;
+ int rc = 0;
+ unsigned int h;
+
+ s = kmalloc(sizeof(*s), GFP_KERNEL);
+ if (!s)
+ return ERR_PTR(-ENOMEM);
+
+ for (h = 0; h < IID_HASH_SIZE; ++h)
+ INIT_HLIST_HEAD(&s->iid_list[h]);
+
+ INIT_WORK(&s->del_work, lisp_del_work);
+
+ rc = create_v4_encap_sock(net, port, &sock,
+ (flags & LISP_F_UDP_CSUM));
+ if (rc < 0) {
+ kfree(s);
+ return ERR_PTR(rc);
+ }
+
+ s->sock = sock;
+ atomic_set(&s->refcnt, 1);
+ sk = sock->sk;
+ s->rcv = rcv;
+ s->data = data;
+ rcu_assign_sk_user_data(s->sock->sk, s);
+
+ spin_lock(&ln->sock_lock);
+ hlist_add_head_rcu(&s->hlist, s_head(net, port));
+ spin_unlock(&ln->sock_lock);
+ udp_sk(sk)->encap_type = 1;
+ udp_sk(sk)->encap_rcv = lisp_udp_encap_rcv;
+ udp_encap_enable();
+
+ return s;
+}
+
+struct lisp_sock *lisp_sock_add(struct net *net, __be16 port, lisp_rcv_t *rcv,
+ void *data, u32 flags)
+{
+ struct lisp_net *ln = net_generic(net, lisp_net_id);
+ struct lisp_sock *s;
+
+ s = lisp_socket_create(net, port, rcv, data, flags);
+ if (!IS_ERR(s))
+ return s;
+
+ spin_lock(&ln->sock_lock);
+ s = lisp_find_sock(net, port);
+ if (s) {
+ if (s->rcv == rcv)
+ atomic_inc(&s->refcnt);
+ else
+ s = ERR_PTR(-EBUSY);
+ }
+ spin_unlock(&ln->sock_lock);
+
+ if (!s)
+ s = ERR_PTR(-EINVAL);
+ return s;
+}
+
+/* Scheduled at device creation to bind to a socket */
+static void lisp_sock_work(struct work_struct *work)
+{
+ struct lisp_dev *lispdev = container_of(work, struct lisp_dev,
+ sock_work);
+ struct net *net = dev_net(lispdev->dev);
+ struct lisp_net *ln = net_generic(net, lisp_net_id);
+ __be16 port = lispdev->rcv_port;
+ struct lisp_sock *s;
+
+ s = lisp_sock_add(net, port, lisp_rcv, NULL, lispdev->flags);
+ spin_lock(&ln->sock_lock);
+ if (!IS_ERR(s))
+ lisp_sock_add_dev(s, lispdev);
+ spin_unlock(&ln->sock_lock);
+
+ dev_put(lispdev->dev);
+}
+
+/* Init the device structure. */
+static void lisp_setup(struct net_device *dev)
+{
+ struct lisp_dev *lispdev = netdev_priv(dev);
+
+ dev->type = ARPHRD_NONE;
+ dev->flags = IFF_NOARP;
+ dev->addr_len = 4;
+ dev->needed_headroom = LL_MAX_HEADER + sizeof(struct lisphdr) + 4;
+ dev->mtu = ETH_DATA_LEN - sizeof(struct lisphdr) - 4;
+
+ dev->netdev_ops = &lisp_netdev_ops;
+ dev->destructor = free_netdev;
+ SET_NETDEV_DEVTYPE(dev, &lisp_type);
+
+ dev->tx_queue_len = 0;
+ dev->features |= (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_NETNS_LOCAL |
+ NETIF_F_RXCSUM | NETIF_F_GSO_SOFTWARE);
+ dev->hw_features |= (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
+ NETIF_F_GSO_SOFTWARE);
+ dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+
+ INIT_LIST_HEAD(&lispdev->next);
+ INIT_WORK(&lispdev->sock_work, lisp_sock_work);
+
+ lispdev->rcv_port = htons(lisp_port);
+ lispdev->dev = dev;
+}
+
+static const struct nla_policy lisp_policy[IFLA_LISP_MAX + 1] = {
+ [IFLA_LISP_IID] = { .type = NLA_U32 },
+ [IFLA_LISP_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, daddr)},
+ [IFLA_LISP_LOCAL6] = { .len = sizeof(struct in6_addr) },
+ [IFLA_LISP_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr)},
+ [IFLA_LISP_REMOTE6] = { .len = sizeof(struct in6_addr) },
+ [IFLA_LISP_ENCAP_PORT] = { .type = NLA_U16 },
+ [IFLA_LISP_LISTEN_PORT] = { .type = NLA_U16 },
+ [IFLA_LISP_TOS] = { .type = NLA_U8 },
+ [IFLA_LISP_TTL] = { .type = NLA_U8 }
+};
+
+static int lisp_newlink(struct net *net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[])
+{
+ struct lisp_net *ln = net_generic(net, lisp_net_id);
+ struct lisp_dev *lispdev = netdev_priv(dev);
+ int err = 0;
+
+ if (data[IFLA_LISP_IID])
+ lispdev->iid = nla_get_be32(data[IFLA_LISP_IID]);
+
+ if (data[IFLA_LISP_LOCAL]) {
+ lispdev->local.sin.sin_addr.s_addr =
+ nla_get_be32(data[IFLA_LISP_LOCAL]);
+ lispdev->local.sa.sa_family = AF_INET;
+ }
+
+ if (data[IFLA_LISP_ENCAP_PORT])
+ lispdev->encap_port =
+ ntohs(nla_get_be16(data[IFLA_LISP_ENCAP_PORT]));
+
+ if (data[IFLA_LISP_LISTEN_PORT])
+ lispdev->rcv_port =
+ ntohs(nla_get_be16(data[IFLA_LISP_LISTEN_PORT]));
+
+ if (data[IFLA_LISP_REMOTE]) {
+ lispdev->remote.sin.sin_addr.s_addr =
+ nla_get_be32(data[IFLA_LISP_REMOTE]);
+ lispdev->remote.sa.sa_family = AF_INET;
+ }
+
+ if (data[IFLA_LISP_TOS])
+ lispdev->tos = nla_get_u8(data[IFLA_LISP_TOS]);
+
+ if (data[IFLA_LISP_TTL])
+ lispdev->ttl = nla_get_u8(data[IFLA_LISP_TTL]);
+
+ if (data[IFLA_LISP_UDP_CSUM] && nla_get_u8(data[IFLA_LISP_UDP_CSUM]))
+ lispdev->flags |= LISP_F_UDP_CSUM;
+ err = register_netdevice(dev);
+ if (err)
+ return err;
+
+ list_add(&lispdev->next, &ln->lisp_list);
+ return 0;
+}
+
+static void lisp_dellink(struct net_device *dev, struct list_head *head)
+{
+ struct lisp_net *ln = net_generic(dev_net(dev), lisp_net_id);
+ struct lisp_dev *lispdev = netdev_priv(dev);
+
+ spin_lock(&ln->sock_lock);
+ if (!hlist_unhashed(&lispdev->hlist))
+ hlist_del_rcu(&lispdev->hlist);
+ spin_unlock(&ln->sock_lock);
+
+ list_del(&lispdev->next);
+ unregister_netdevice_queue(dev, head);
+}
+
+static size_t lisp_get_size(const struct net_device *dev)
+{
+ return
+ /* IFLA_LISP_IID */
+ nla_total_size(4) +
+ /* IFLA_LISP_LOCAL */
+ nla_total_size(4) +
+ /* IFLA_LISP_LOCAL6 */
+ nla_total_size(sizeof(struct in6_addr)) +
+ /* IFLA_LISP_REMOTE */
+ nla_total_size(4) +
+ /* IFLA_LISP_REMOTE6 */
+ nla_total_size(sizeof(struct in6_addr)) +
+ /* IFLA_LISP_ENCAP_PORT */
+ nla_total_size(2) +
+ /* IFLA_LISP_LISTEN_PORT */
+ nla_total_size(2) +
+ /* IFLA_LISP_TOS */
+ nla_total_size(1) +
+ /* IFLA_LISP_TTL */
+ nla_total_size(1) +
+ /* IFLA_LISP_UDP_CSUM */
+ nla_total_size(1) +
+ 0;
+}
+
+/* Fill attributes into skb
+ */
+static int lisp_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+ const struct lisp_dev *lispdev = netdev_priv(dev);
+
+ /* V6 options needed for future
+ */
+ if (nla_put_u32(skb, IFLA_LISP_IID, lispdev->iid) ||
+ nla_put_u32(skb, IFLA_LISP_LOCAL,
+ lispdev->local.sin.sin_addr.s_addr) ||
+ nla_put_u32(skb, IFLA_LISP_REMOTE,
+ lispdev->remote.sin.sin_addr.s_addr) ||
+ nla_put_be16(skb, IFLA_LISP_ENCAP_PORT, lispdev->encap_port) ||
+ nla_put_be16(skb, IFLA_LISP_LISTEN_PORT, lispdev->rcv_port) ||
+ nla_put_u8(skb, IFLA_LISP_TOS, lispdev->tos) ||
+ nla_put_u8(skb, IFLA_LISP_TTL, lispdev->ttl) ||
+ nla_put_u8(skb, IFLA_LISP_UDP_CSUM,
+ !!(lispdev->flags & LISP_F_UDP_CSUM)))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int lisp_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+ return 0;
+}
+
+static struct rtnl_link_ops lisp_link_ops __read_mostly = {
+ .kind = "lisp",
+ .maxtype = IFLA_LISP_MAX,
+ .policy = lisp_policy,
+ .priv_size = sizeof(struct lisp_dev),
+ .setup = lisp_setup,
+ .validate = lisp_validate,
+ .newlink = lisp_newlink,
+ .dellink = lisp_dellink,
+ .get_size = lisp_get_size,
+ .fill_info = lisp_fill_info,
+};
+
+static __net_exit void lisp_exit_net(struct net *net)
+{
+ struct lisp_net *ln = net_generic(net, lisp_net_id);
+ struct lisp_dev *lispdev;
+
+ LIST_HEAD(list);
+
+ rtnl_lock();
+ list_for_each_entry(lispdev, &ln->lisp_list, next)
+ unregister_netdevice_queue(lispdev->dev, &list);
+ unregister_netdevice_many(&list);
+ rtnl_unlock();
+}
+
+static __net_init int lisp_init_net(struct net *net)
+{
+ struct lisp_net *ln = net_generic(net, lisp_net_id);
+ unsigned int h;
+
+ INIT_LIST_HEAD(&ln->lisp_list);
+ spin_lock_init(&ln->sock_lock);
+
+ for (h = 0; h < PORT_HASH_SIZE; ++h)
+ INIT_HLIST_HEAD(&ln->sock_list[h]);
+
+ return 0;
+}
+
+static struct pernet_operations lisp_net_ops = {
+ .init = lisp_init_net,
+ .exit = lisp_exit_net,
+ .id = &lisp_net_id,
+ .size = sizeof(struct lisp_net),
+};
+
+static int __init lisp_netdev_init(void)
+{
+ int rc;
+
+ lisp_wq = alloc_workqueue("lisp", 0, 0);
+ if (!lisp_wq)
+ return -ENOMEM;
+
+ rc = register_pernet_device(&lisp_net_ops);
+ if (rc)
+ goto out1;
+
+ rc = rtnl_link_register(&lisp_link_ops);
+ if (rc)
+ goto out2;
+
+ return 0;
+
+out2:
+ unregister_pernet_device(&lisp_net_ops);
+out1:
+ destroy_workqueue(lisp_wq);
+ return rc;
+}
+
+static void __exit lisp_netdev_cleanup(void)
+{
+ rtnl_link_unregister(&lisp_link_ops);
+ destroy_workqueue(lisp_wq);
+ unregister_pernet_device(&lisp_net_ops);
+ rcu_barrier();
+}
+
+late_initcall(lisp_netdev_init);
+module_exit(lisp_netdev_cleanup);
+
+MODULE_LICENSE("GPL");
+MODULE_VERSION(LISP_VERSION);
+MODULE_AUTHOR("Chris White <chris@logicalelegance.com>");
+MODULE_ALIAS_RTNL_LINK("lisp");
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index ade33ef..c04cce8 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1570,25 +1570,6 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
return false;
}
-/* Compute source port for outgoing packet
- * first choice to use L4 flow hash since it will spread
- * better and maybe available from hardware
- * secondary choice is to use jhash on the Ethernet header
- */
-__be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb)
-{
- unsigned int range = (port_max - port_min) + 1;
- u32 hash;
-
- hash = skb_get_hash(skb);
- if (!hash)
- hash = jhash(skb->data, 2 * ETH_ALEN,
- (__force u32) skb->protocol);
-
- return htons((((u64) hash * range) >> 32) + port_min);
-}
-EXPORT_SYMBOL_GPL(vxlan_src_port);
-
static inline struct sk_buff *vxlan_handle_offloads(struct sk_buff *skb,
bool udp_csum)
{
@@ -1807,7 +1788,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
if (tos == 1)
tos = ip_tunnel_get_dsfield(old_iph, skb);
- src_port = vxlan_src_port(vxlan->port_min, vxlan->port_max, skb);
+ src_port = udp_tunnel_get_src_port(vxlan->port_min, vxlan->port_max,
+ skb);
if (dst->sa.sa_family == AF_INET) {
memset(&fl4, 0, sizeof(fl4));
diff --git a/include/net/route.h b/include/net/route.h
index b17cf28..ff55ac5 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -131,6 +131,26 @@ static inline struct rtable *ip_route_output(struct net *net, __be32 daddr,
return ip_route_output_key(net, &fl4);
}
+static inline struct rtable *ip_route_output_mark(struct net *net,
+ __be32 *saddr, __be32 daddr,
+ u8 ipproto, u8 tos, u32 skb_mark)
+{
+ struct rtable *rt;
+
+ /* Tunnel configuration keeps DSCP part of TOS bits, But Linux
+ * router expect RT_TOS bits only.
+ */
+ struct flowi4 fl = { .daddr = daddr,
+ .saddr = *saddr,
+ .flowi4_tos = RT_TOS(tos),
+ .flowi4_mark = skb_mark,
+ .flowi4_proto = ipproto };
+
+ rt = ip_route_output_key(net, &fl);
+ *saddr = fl.saddr;
+ return rt;
+}
+
static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi4 *fl4,
struct sock *sk,
__be32 daddr, __be32 saddr,
diff --git a/include/net/udp.h b/include/net/udp.h
index 68a1fef..99861bd 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -167,6 +167,27 @@ static inline void udp_lib_hash(struct sock *sk)
void udp_lib_unhash(struct sock *sk);
void udp_lib_rehash(struct sock *sk, u16 new_hash);
+/* Compute source port for outgoing packet
+ * first choice to use L4 flow hash since it will spread
+ * better and maybe available from hardware
+ * secondary choice is to use jhash on the Ethernet header
+ */
+static inline __be16 udp_tunnel_get_src_port(__u16 port_min, __u16 port_max,
+ struct sk_buff *skb)
+{
+ unsigned int range = (port_max - port_min) + 1;
+ u32 hash;
+
+ hash = skb_get_hash(skb);
+ if (!hash)
+ hash = jhash(skb->data, 2 * ETH_ALEN,
+ (__force u32) skb->protocol);
+
+ return htons((((u64) hash * range) >> 32) + port_min);
+}
+
+/* Compute source UDP port for outgoing packets on UDP tunnels
+ */
static inline void udp_lib_close(struct sock *sk, long timeout)
{
sk_common_release(sk);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index b385348..0077832 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -331,6 +331,23 @@ struct ifla_vxlan_port_range {
__be16 high;
};
+/* LISP section */
+enum {
+ IFLA_LISP_UNSPEC,
+ IFLA_LISP_IID,
+ IFLA_LISP_LOCAL,
+ IFLA_LISP_REMOTE,
+ IFLA_LISP_LOCAL6,
+ IFLA_LISP_REMOTE6,
+ IFLA_LISP_ENCAP_PORT,
+ IFLA_LISP_LISTEN_PORT,
+ IFLA_LISP_TOS,
+ IFLA_LISP_TTL,
+ IFLA_LISP_UDP_CSUM,
+ __IFLA_LISP_MAX
+};
+#define IFLA_LISP_MAX (__IFLA_LISP_MAX - 1)
+
/* Bonding section */
enum {
--
1.7.10.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH V3 net-next] LISP: Locator/Identifier Separation Protocol
2014-06-19 22:05 [PATCH V3 net-next] LISP: Locator/Identifier Separation Protocol Christopher White
@ 2014-06-20 4:11 ` David Miller
2014-06-20 4:18 ` Chris White
0 siblings, 1 reply; 7+ messages in thread
From: David Miller @ 2014-06-20 4:11 UTC (permalink / raw)
To: chris; +Cc: netdev, vermagan, lojakab
From: Christopher White <chris@logicalelegance.com>
Date: Thu, 19 Jun 2014 15:05:53 -0700
> +static inline struct rtable *ip_route_output_mark(struct net *net,
> + __be32 *saddr, __be32 daddr,
> + u8 ipproto, u8 tos, u32 skb_mark)
...
> +static inline __be16 udp_tunnel_get_src_port(__u16 port_min, __u16 port_max,
> + struct sk_buff *skb)
These are not indented properly.
When a declaration, definition, or invocation of a function spans
multiple lines, the arguments on the second and subsequent lines
must begin at the first column after the openning parenthesis on
the first line.
You must use the appropriate number of TAB and SPACE characters
necessary to achieve this. If you are indenting purely with
TAB characters, you are doing it wrong.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH V3 net-next] LISP: Locator/Identifier Separation Protocol
2014-06-20 4:11 ` David Miller
@ 2014-06-20 4:18 ` Chris White
0 siblings, 0 replies; 7+ messages in thread
From: Chris White @ 2014-06-20 4:18 UTC (permalink / raw)
To: David Miller
Cc: netdev@vger.kernel.org, vermagan@cisco.com, lojakab@cisco.com
Thanks David, fixing this now.
-Chris
> On Jun 19, 2014, at 9:11 PM, David Miller <davem@davemloft.net> wrote:
>
> From: Christopher White <chris@logicalelegance.com>
> Date: Thu, 19 Jun 2014 15:05:53 -0700
>
>> +static inline struct rtable *ip_route_output_mark(struct net *net,
>> + __be32 *saddr, __be32 daddr,
>> + u8 ipproto, u8 tos, u32 skb_mark)
> ...
>> +static inline __be16 udp_tunnel_get_src_port(__u16 port_min, __u16 port_max,
>> + struct sk_buff *skb)
>
> These are not indented properly.
>
> When a declaration, definition, or invocation of a function spans
> multiple lines, the arguments on the second and subsequent lines
> must begin at the first column after the openning parenthesis on
> the first line.
>
> You must use the appropriate number of TAB and SPACE characters
> necessary to achieve this. If you are indenting purely with
> TAB characters, you are doing it wrong.
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2014-06-20 4:18 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-06-19 22:05 [PATCH V3 net-next] LISP: Locator/Identifier Separation Protocol Christopher White
2014-06-20 4:11 ` David Miller
2014-06-20 4:18 ` Chris White
-- strict thread matches above, loose matches on Subject: below --
2014-06-18 23:07 Christopher White
2014-06-19 0:08 ` Tom Herbert
2014-06-19 16:31 ` Christopher White
2014-06-19 17:29 ` Tom Herbert
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox